Spaces:

stephmnt
/

bdv

Sleeping

App Files Files Community

stephmnt commited on Dec 29, 2025

Commit

46f9144

verified ·

1 Parent(s): 6197f01

Sync from GitHub Actions

Browse files

Files changed (39) hide show

.env.example +7 -0
.gitignore +200 -0
README.md +337 -1
app.py +20 -5
app/__init__.py +3 -0
app/app.py +328 -0
app/gradio_app.py +1645 -0
config/communes.yaml +11 -0
config/nuances.yaml +14 -0
config/raw_sources.yaml +344 -0
data/geo/bdv_s_te.geojson +0 -0
data/geo/bdv_s_te.kml +1762 -0
data/interim/elections_long.parquet +3 -0
data/mapping_candidats_blocs.csv +61 -0
data/mappings/category_mapping.csv +39 -0
docker-compose.yml +38 -0
harmoniser.md +19 -0
main.py +117 -0
mission.md +410 -0
models/best_model.json +3 -0
models/feature_columns.json +40 -0
models/hist_gradient_boosting.joblib +3 -0
models/model_card.md +8 -0
requirements.txt +15 -0
src/__init__.py +1 -0
src/constants.py +35 -0
src/data/__init__.py +3 -0
src/data/preprocess.py +481 -0
src/data_prep.py +418 -0
src/database.py +153 -0
src/db/__init__.py +3 -0
src/db/ingest.py +241 -0
src/db/schema.py +95 -0
src/features/__init__.py +3 -0
src/features/build_features.py +570 -0
src/model/predict.py +201 -0
src/model/train.py +666 -0
src/pipeline.py +435 -0
src/prediction.py +311 -0

.env.example ADDED Viewed

	@@ -0,0 +1,7 @@

+POSTGRES_USER=sete_admin
+POSTGRES_PASSWORD=sete_password
+POSTGRES_DB=elections
+POSTGRES_PORT=5432
+POSTGRES_HOST=localhost
+# Option directe si vous préférez définir l'URL complète :
+# DATABASE_URL=postgresql+psycopg2://sete_admin:sete_password@localhost:5432/elections

.gitignore ADDED Viewed

	@@ -0,0 +1,200 @@

+# Spécifique à ce projet
+.DS_Store
+*.code-workspace
+*.pdf
+/output/
+questions.md
+/reports/
+/data/external/
+/data/raw/
+/datasets/
+/data/processed/
+/data/contours-france-entiere-latest-v2.geojson
+data/interim/*
+!data/interim/elections_long.parquet
+runtime.txt
+/logs/
+.vscode
+supports/
+# Hugging Face
+.hf/
+.huggingface/
+# vim
+*.swp
+*.swo
+## https://github.com/github/gitignore/blob/e8554d85bf62e38d6db966a50d2064ac025fd82a/Python.gitignore
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+# pixi
+#   pixi.lock should be committed to version control for reproducibility
+#   .pixi/ contains the environments and should not be committed
+.pixi/
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+# Ruff stuff:
+.ruff_cache/
+# PyPI configuration file
+.pypirc

README.md CHANGED Viewed

@@ -9,4 +9,340 @@ app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 pinned: false
 ---
+# Elections Sète - Prévision municipales
+Pipeline complet pour harmoniser les données électorales, construire un dataset panel sans fuite temporelle, entraîner des modèles multi-blocs, charger l'historique dans PostgreSQL et exposer des résultats via Gradio.
+## Installation
+- Python 3.10+ recommandé.
+- `python3 -m venv .venv && source .venv/bin/activate`
+- `pip install -r requirements.txt`
+## Fichiers YAML (configuration)
+### `config/communes.yaml`
+Ce fichier définit **les communes à inclure** (codes INSEE). Il est consommé par le pipeline (`src.pipeline.run_full_pipeline`) pour filtrer les données au niveau commune.
+Formats acceptés (les codes sont normalisés en 5 chiffres) :
+```yaml
+communes:
+  "34301": "Sète"
+  "34172": "Frontignan"
+```
+ou
+```yaml
+communes:
+  - code_insee: "34301"
+    nom: "Sète"
+  - "34172"
+```
+Si tu modifies ce fichier, il faut **relancer le pipeline** pour régénérer les données filtrées.
+### `config/raw_sources.yaml`
+Description des fichiers bruts et de leur structure (colonnes, séparateur, métadonnées).
+C'est **le point d'entrée** pour ajouter un nouveau CSV au pipeline.
+Exemple (copie d'une election precedente + ajustements) :
+```yaml
+24_L_T1.csv:
+  copy_from: 22_L_T1.csv
+  date_scrutin: "2024-06-30"
+  code_bv_cols: ["Code commune", "Code BV"]
+  rename_map:
+    Nuance Liste: code_candidature
+    Libellé Abrégé Liste: nom_candidature
+```
+### `config/nuances.yaml`
+Mapping des nuances vers les blocs politiques (avec overrides).
+Par défaut, le mapping CSV historique est réutilise et on peut **surcharger** ou **ajouter** des nuances :
+```yaml
+base_mapping: data/mapping_candidats_blocs.csv
+overrides:
+  - code_candidature: "XYZ"
+    nom_candidature: "Exemple"
+    blocs: [gauche_modere, centre]
+```
+### `docker-compose.yml`
+Fichier YAML pour démarrer PostgreSQL (et éventuellement pgAdmin). Utilisé par :
+```bash
+docker-compose up -d postgres
+docker-compose --profile admin up
+```
+## 1. Prétraitement (harmonisation)
+```bash
+# Harmonisation des CSV bruts -> data/interim/elections_long.parquet
+python -m src.data.preprocess --raw-dir data/raw --output-dir data/interim
+```
+Par défaut, le prétraitement lit `config/raw_sources.yaml`. Tu peux surcharger via `--meta-config`.
+## 2. Pipeline communes + features (optionnel mais recommandé si tu filtres par communes)
+Le pipeline applique le filtre `config/communes.yaml` et génère `data/processed/elections_blocs.*`.
+À lancer depuis un notebook ou un petit script :
+```bash
+python3 - <<'PY'
+from pathlib import Path
+from src.pipeline import run_full_pipeline
+run_full_pipeline(
+    elections_long_path=Path("data/interim/elections_long.parquet"),
+    mapping_path=Path("config/nuances.yaml"),
+    output_dir=Path("data/processed"),
+    target_communes_path=Path("config/communes.yaml"),
+)
+PY
+```
+## 3. Construction du panel (features + cibles)
+```bash
+python -m src.features.build_features \
+  --elections-long data/interim/elections_long.parquet \
+  --mapping config/nuances.yaml \
+  --output data/processed/panel.parquet
+```
+Le dictionnaire de données est généré dans `data/processed/data_dictionary.md`.
+Note : `src.features.build_features` **ne filtre pas** via `config/communes.yaml`. Si tu veux limiter l'entraînement à certaines communes, filtre `elections_long` en amont ou adapte le pipeline.
+## 4. Base PostgreSQL
+```bash
+cp .env.example .env
+docker-compose up -d postgres   # pgAdmin en option: `docker-compose --profile admin up`
+# Ingestion du panel dans le schéma normalisé
+python -m src.db.ingest --input data/processed/panel.parquet
+```
+Le schéma est défini dans `src/db/schema.py`.
+## 5. Entraînement & évaluation
+Commande demandée (CV stricte par scrutin) :
+```bash
+python3 -m src.model.train --cv-splits 4 --models hist_gradient_boosting
+```
+Options principales :
+- `--panel` : chemin du panel (`data/processed/panel.parquet` par défaut).
+- `--models-dir` / `--reports-dir` : sorties modèles et rapports.
+- `--train-end-year`, `--valid-end-year`, `--test-start-year` : split temporel.
+- `--cv-splits` : nb de folds temporels (par scrutin).
+- `--no-tune` : désactive la grille d'hyperparamètres.
+- `--max-trials` : limite le nombre d'essais par modèle.
+- `--models` : liste de modèles à tester (ex: `ridge`, `hist_gradient_boosting`, `lightgbm`, `xgboost`, `two_stage_hgb`, `catboost`).
+Sorties :
+- Modèle + preprocessor : `models/<nom>.joblib` et `models/feature_columns.json`
+- Modèle sélectionné : `models/best_model.json`
+- Rapport métriques : `reports/metrics.json` et `reports/metrics.md`
+- CV détaillée : `reports/cv_summary.csv`
+- Figure : `reports/figures/mae_per_category.png`
+- Model card : `models/model_card.md`
+## 6. Génération de prédictions hors ligne
+```bash
+python -m src.model.predict \
+  --model-path models/hist_gradient_boosting.joblib \
+  --target-election-type municipales \
+  --target-year 2026 \
+  --commune-code 34301
+# -> predictions/pred_municipales_2026_sete.csv
+```
+Cette commande produit des **parts (%)** et des deltas vs législatives et municipales 2020.
+## 7. Application Gradio
+```bash
+python -m app.gradio_app
+```
+Comportement :
+- Backend PostgreSQL si disponible, sinon fallback fichiers locaux.
+- **Historique** : consultation bureau par bureau (pas de ML).
+- **Prédiction** : parts par bloc converties en **comptes** (personnes) + `blancs`, `nuls`, `abstentions`.
+- `inscrits` peut être fourni par l'utilisateur (sinon valeur historique la plus récente du bureau).
+- Cibles proposées : municipales 2026 (tour 1), legislatives 2027 (tour 1), presidentielles 2027 (tour 1).
+## Structure des données
+- Configurations : `config/`
+- Bruts : `data/raw/`
+- Long harmonisé : `data/interim/elections_long.parquet`
+- Élections blocs (filtrées) : `data/processed/elections_blocs.parquet`
+- Stats communales par scrutin : `data/processed/commune_event_stats.parquet`
+- Panel features+cibles : `data/processed/panel.parquet`
+- Mapping nuances -> catégories : `config/nuances.yaml` (base: `data/mapping_candidats_blocs.csv`)
+## Notes
+- Aucune fuite temporelle : les features sont calculées uniquement sur des scrutins strictement antérieurs à la cible.
+- Les parts sont clipées à [0, 1] puis renormalisées.
+- Les blancs/nuls dépendent des colonnes disponibles dans l'historique ; si une source ne les fournit pas, ils seront à 0.
+## Inventaire des fichiers (snapshot)
+Statuts :
+- `actif` : utilisé par le pipeline actuel.
+- `généré` : produit par le pipeline/entraînement (recréable).
+- `hérité (début projet)` : ancien fichier ou prototype.
+- `optionnel` : utile mais non requis au runtime.
+- `système (inutile)` : métadonnées OS.
+| Fichier | Fonction | Statut |
+|---|---|---|
+| `.DS_Store` | Métadonnées macOS | système (inutile) |
+| `.env.example` | Template des variables d'environnement (DB) | actif |
+| `.gitignore` | Règles gitignore | actif |
+| `Elections_Sete.code-workspace` | Config VSCode (workspace) | optionnel |
+| `README.md` | Documentation projet | actif |
+| `app/__init__.py` | Package app (init) | actif |
+| `app/app.py` | Ancienne app Gradio (bv_features.parquet) | hérité (début projet) |
+| `app/gradio_app.py` | Application Gradio principale | actif |
+| `app.py` | Ancienne interface Gradio (compute_predictions) | hérité (début projet) |
+| `catboost_info/catboost_training.json` | Artefacts CatBoost (logs/metrics) | généré |
+| `catboost_info/learn/events.out.tfevents` | Artefacts CatBoost (logs/metrics) | généré |
+| `catboost_info/learn_error.tsv` | Artefacts CatBoost (logs/metrics) | généré |
+| `catboost_info/time_left.tsv` | Artefacts CatBoost (logs/metrics) | généré |
+| `config/communes.yaml` | Liste des communes cibles (codes INSEE) | actif |
+| `config/nuances.yaml` | Overrides mapping nuances -> blocs | actif |
+| `config/raw_sources.yaml` | Schéma des CSV bruts (meta-config) | actif |
+| `data/.DS_Store` | Métadonnées macOS | système (inutile) |
+| `data/contours-france-entiere-latest-v2.geojson` | Fond cartographique (geojson) | optionnel |
+| `data/interim/.DS_Store` | Métadonnées macOS | système (inutile) |
+| `data/interim/candidates_long.parquet` | Données intermédiaires long format | généré |
+| `data/interim/elections_long.csv` | Données intermédiaires long format | généré |
+| `data/interim/elections_long.parquet` | Données intermédiaires long format | généré |
+| `data/interim/frames_std/14_EU.parquet` | Intermédiaire standardisé par scrutin | généré |
+| `data/interim/frames_std/14_MN14_T1T2.parquet` | Intermédiaire standardisé par scrutin | généré |
+| `data/interim/frames_std/17_L_T1.parquet` | Intermédiaire standardisé par scrutin | généré |
+| `data/interim/frames_std/17_L_T2.parquet` | Intermédiaire standardisé par scrutin | généré |
+| `data/interim/frames_std/17_PR_T1.parquet` | Intermédiaire standardisé par scrutin | généré |
+| `data/interim/frames_std/17_PR_T2.parquet` | Intermédiaire standardisé par scrutin | généré |
+| `data/interim/frames_std/19_EU.parquet` | Intermédiaire standardisé par scrutin | généré |
+| `data/interim/frames_std/20_MN_T1.parquet` | Intermédiaire standardisé par scrutin | généré |
+| `data/interim/frames_std/20_MN_T2.parquet` | Intermédiaire standardisé par scrutin | généré |
+| `data/interim/frames_std/21_DEP_T1.parquet` | Intermédiaire standardisé par scrutin | généré |
+| `data/interim/frames_std/21_DEP_T2.parquet` | Intermédiaire standardisé par scrutin | généré |
+| `data/interim/frames_std/21_REG_T1.parquet` | Intermédiaire standardisé par scrutin | généré |
+| `data/interim/frames_std/21_REG_T2.parquet` | Intermédiaire standardisé par scrutin | généré |
+| `data/interim/frames_std/22_L_T1.parquet` | Intermédiaire standardisé par scrutin | généré |
+| `data/interim/frames_std/22_L_T2.parquet` | Intermédiaire standardisé par scrutin | généré |
+| `data/interim/frames_std/22_PR_T1.parquet` | Intermédiaire standardisé par scrutin | généré |
+| `data/interim/frames_std/22_PR_T2.parquet` | Intermédiaire standardisé par scrutin | généré |
+| `data/interim/frames_std/24_EU.parquet` | Intermédiaire standardisé par scrutin | généré |
+| `data/interim/harmonized/14_EU_harmonized.csv` | CSV harmonisé par scrutin | généré |
+| `data/interim/harmonized/14_MN14_T1T2_harmonized.csv` | CSV harmonisé par scrutin | généré |
+| `data/interim/harmonized/17_L_T1_harmonized.csv` | CSV harmonisé par scrutin | généré |
+| `data/interim/harmonized/17_L_T2_harmonized.csv` | CSV harmonisé par scrutin | généré |
+| `data/interim/harmonized/17_PR_T1_harmonized.csv` | CSV harmonisé par scrutin | généré |
+| `data/interim/harmonized/17_PR_T2_harmonized.csv` | CSV harmonisé par scrutin | généré |
+| `data/interim/harmonized/19_EU_harmonized.csv` | CSV harmonisé par scrutin | généré |
+| `data/interim/harmonized/20_MN_T1_harmonized.csv` | CSV harmonisé par scrutin | généré |
+| `data/interim/harmonized/20_MN_T2_harmonized.csv` | CSV harmonisé par scrutin | généré |
+| `data/interim/harmonized/21_DEP_T1_harmonized.csv` | CSV harmonisé par scrutin | généré |
+| `data/interim/harmonized/21_DEP_T2_harmonized.csv` | CSV harmonisé par scrutin | généré |
+| `data/interim/harmonized/21_REG_T1_harmonized.csv` | CSV harmonisé par scrutin | généré |
+| `data/interim/harmonized/21_REG_T2_harmonized.csv` | CSV harmonisé par scrutin | généré |
+| `data/interim/harmonized/22_L_T1_harmonized.csv` | CSV harmonisé par scrutin | généré |
+| `data/interim/harmonized/22_L_T2_harmonized.csv` | CSV harmonisé par scrutin | généré |
+| `data/interim/harmonized/22_PR_T1_harmonized.csv` | CSV harmonisé par scrutin | généré |
+| `data/interim/harmonized/22_PR_T2_harmonized.csv` | CSV harmonisé par scrutin | généré |
+| `data/interim/harmonized/24_EU_harmonized.csv` | CSV harmonisé par scrutin | généré |
+| `data/interim/unmapped_nuances.csv` | Données intermédiaires long format | généré |
+| `data/mapping_candidats_blocs.csv` | Mapping nuances -> blocs (base) | actif |
+| `data/mappings/category_mapping.csv` | Copie/variante de mapping | hérité (début projet) |
+| `data/processed/bv_features.parquet` | Features legacy (utilisées par app/app.py) | hérité (début projet) |
+| `data/processed/data_dictionary.md` | Dictionnaire de données généré | généré (doc) |
+| `data/processed/elections_blocs.csv` | Dataset blocs (filtré communes) | généré (utilisé) |
+| `data/processed/elections_blocs.parquet` | Dataset blocs (filtré communes) | généré (utilisé) |
+| `data/processed/history_cache.parquet` | Cache local (historique/prédictions) | généré (cache) |
+| `data/processed/panel.csv` | Panel features+cibles | généré (utilisé) |
+| `data/processed/panel.parquet` | Panel features+cibles | généré (utilisé) |
+| `data/processed/predictions_cache.parquet` | Cache local (historique/prédictions) | généré (cache) |
+| `data/processed/predictions_municipales_2026.csv` | Exports de prédictions | généré (résultats) |
+| `data/processed/predictions_municipales_2026_blocs.csv` | Exports de prédictions | généré (résultats) |
+| `data/processed/predictions_municipales_sete_2026.csv` | Exports de prédictions | généré (résultats) |
+| `data/raw/14_EU.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
+| `data/raw/14_MN14_T1T2.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
+| `data/raw/17_L_T1.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
+| `data/raw/17_L_T2.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
+| `data/raw/17_PR_T1.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
+| `data/raw/17_PR_T2.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
+| `data/raw/19_EU.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
+| `data/raw/20_MN_T1.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
+| `data/raw/20_MN_T2.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
+| `data/raw/21_DEP_T1.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
+| `data/raw/21_DEP_T2.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
+| `data/raw/21_REG_T1.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
+| `data/raw/21_REG_T2.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
+| `data/raw/22_L_T1.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
+| `data/raw/22_L_T2.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
+| `data/raw/22_PR_T1.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
+| `data/raw/22_PR_T2.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
+| `data/raw/24_EU.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
+| `data/raw/24_L_T1.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
+| `data/raw/24_L_T2.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
+| `datasets/.DS_Store` | Métadonnées macOS | système (inutile) |
+| `datasets/14_EU.csv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
+| `datasets/14_MN14_T1T2.csv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
+| `datasets/17_L_T1.csv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
+| `datasets/17_L_T2.csv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
+| `datasets/17_PR_T1.csv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
+| `datasets/17_PR_T2.csv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
+| `datasets/19_EU.csv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
+| `datasets/20_MN_T1.tsv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
+| `datasets/20_MN_T2.csv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
+| `datasets/21_DEP_T1.csv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
+| `datasets/21_DEP_T2.csv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
+| `datasets/21_REG_T1.csv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
+| `datasets/21_REG_T2.csv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
+| `datasets/22_L_T1.csv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
+| `datasets/22_L_T2.csv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
+| `datasets/22_PR_T1.csv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
+| `datasets/22_PR_T2.csv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
+| `datasets/24_EU.csv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
+| `datasets/24_L_T1T2.csv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
+| `datasets/24_L_T2.csv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
+| `docker-compose.yml` | Services Docker (PostgreSQL/pgAdmin) | actif |
+| `harmoniser.md` | Notes d'harmonisation | optionnel |
+| `main.py` | Orchestrateur pipeline (CLI utilitaire) | optionnel |
+| `mission.md` | Backlog / notes projet | optionnel |
+| `models/best_model.json` | Nom du meilleur modèle | généré (utilisé) |
+| `models/feature_columns.json` | Liste des features du modèle | généré (utilisé) |
+| `models/hist_gradient_boosting.joblib` | Modèle entraîné | généré (utilisé) |
+| `models/model_card.md` | Model card (synthèse) | généré (doc) |
+| `notebooks/01_pretraitement.ipynb` | Notebook d'analyse / exploration | optionnel (exploration) |
+| `notebooks/02_feature_engineering.ipynb` | Notebook d'analyse / exploration | optionnel (exploration) |
+| `notebooks/03_modelisation_prediction.ipynb` | Notebook d'analyse / exploration | optionnel (exploration) |
+| `notebooks/aed.ipynb` | Notebook d'analyse / exploration | optionnel (exploration) |
+| `notebooks/catboost_info/catboost_training.json` | Artefacts CatBoost (notebook) | généré |
+| `notebooks/catboost_info/learn/events.out.tfevents` | Artefacts CatBoost (notebook) | généré |
+| `notebooks/catboost_info/learn_error.tsv` | Artefacts CatBoost (notebook) | généré |
+| `notebooks/catboost_info/time_left.tsv` | Artefacts CatBoost (notebook) | généré |
+| `output/.DS_Store` | Métadonnées macOS | système (inutile) |
+| `output/Sans titre 2.png` | Exports graphiques | hérité (début projet) |
+| `output/Sans titre 3.png` | Exports graphiques | hérité (début projet) |
+| `output/Sans titre 4.png` | Exports graphiques | hérité (début projet) |
+| `output/Sans titre 5.png` | Exports graphiques | hérité (début projet) |
+| `output/Sans titre 6.png` | Exports graphiques | hérité (début projet) |
+| `output/Sans titre.png` | Exports graphiques | hérité (début projet) |
+| `output/output.png` | Exports graphiques | hérité (début projet) |
+| `predictions/pred_municipales_2026_sete.csv` | Exports de prédictions | généré (résultats) |
+| `reports/colonnes_comparatif.csv` | Rapport / métriques | généré |
+| `reports/cv_summary.csv` | Rapport / métriques | généré |
+| `reports/figures/mae_per_category.png` | Figures de rapports | généré |
+| `reports/metrics.json` | Rapport / métriques | généré |
+| `reports/metrics.md` | Rapport / note analytique | généré (doc) |
+| `reports/notebook_audit.md` | Rapport / note analytique | généré (doc) |
+| `requirements.txt` | Dépendances Python | actif |
+| `src/__init__.py` | Package src (init) | actif |
+| `src/constants.py` | Constantes projet | actif |
+| `src/data/__init__.py` | Module data | actif |
+| `src/data/preprocess.py` | Prétraitement/harmonisation | actif |
+| `src/data_prep.py` | Librairie d'harmonisation des données | actif |
+| `src/database.py` | Accès base SQL (fallback/app) | actif |
+| `src/db/__init__.py` | Module DB | actif |
+| `src/db/ingest.py` | Ingestion PostgreSQL | actif |
+| `src/db/schema.py` | Schéma PostgreSQL | actif |
+| `src/features/__init__.py` | Module features | actif |
+| `src/features/build_features.py` | Construction du panel features+cibles | actif |
+| `src/model/predict.py` | Prédiction hors ligne | actif |
+| `src/model/train.py` | Entraînement + CV | actif |
+| `src/pipeline.py` | Pipeline de construction (blocs + stats) | actif |
+| `src/prediction.py` | Prédiction legacy (app.py) | hérité (début projet) |
+| `supports/Plan-2024_Bureaux-de-vote.pdf` | Documents de référence | optionnel |
+| `supports/zonages_admin_canton.pdf` | Documents de référence | optionnel |

app.py CHANGED Viewed

@@ -1,7 +1,22 @@
-import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
-demo.launch()

+from __future__ import annotations
+import importlib.util
+from pathlib import Path
+def _load_gradio_module():
+    module_path = Path(__file__).resolve().parent / "app" / "gradio_app.py"
+    spec = importlib.util.spec_from_file_location("gradio_app_module", module_path)
+    if spec is None or spec.loader is None:
+        raise RuntimeError(f"Impossible de charger {module_path}")
+    module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(module)
+    return module
+_gradio = _load_gradio_module()
+demo = _gradio.create_interface()
+if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)

app/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@

+"""
+Gradio application package.
+"""

app/app.py ADDED Viewed

	@@ -0,0 +1,328 @@

+import gradio as gr
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+# =========================
+# Chargement des données
+# =========================
+DATA_PATH = "data/processed/bv_features.parquet"
+df = pd.read_parquet(DATA_PATH)
+df["date_scrutin"] = pd.to_datetime(df.get("date_scrutin"), errors="coerce") # type: ignore
+df["tour"] = pd.to_numeric(df.get("tour"), errors="coerce").astype("Int64") # type: ignore
+# -------------------------
+# Filtrage Sète uniquement
+# -------------------------
+# Hypothèse : code_commune INSEE
+SETE_CODE_INSEE = "34301"
+def resolve_code_commune(df_in: pd.DataFrame) -> tuple[pd.DataFrame, str | None]:
+    df_out = df_in.copy()
+    if "code_commune" in df_out.columns:
+        df_out["code_commune"] = df_out["code_commune"].astype("string")
+        return df_out, None
+    if "Code de la commune" in df_out.columns:
+        df_out = df_out.rename(columns={"Code de la commune": "code_commune"})
+        df_out["code_commune"] = df_out["code_commune"].astype("string")
+        return df_out, None
+    if "code_bv" in df_out.columns:
+        df_out["code_commune"] = df_out["code_bv"].astype(str).str.slice(0, 5)
+        df_out["code_commune"] = df_out["code_commune"].astype("string")
+        valid = df_out["code_commune"].str.len() == 5
+        if not valid.any():
+            return df_out, "Impossible de dériver code_commune depuis code_bv (format inattendu)."
+        return df_out, None
+    df_out["code_commune"] = pd.NA
+    return df_out, "Aucune colonne commune disponible (code_commune/Code de la commune/code_bv)."
+df, commune_warning = resolve_code_commune(df)
+df["code_commune"] = (
+    df["code_commune"]
+    .astype(str)
+    .str.replace(".0", "", regex=False)
+    .str.replace(r"\D", "", regex=True)
+    .str.zfill(5)
+    .astype("string")
+)
+df_sete = df[df["code_commune"] == SETE_CODE_INSEE].copy()
+df_sete["tour"] = pd.to_numeric(df_sete["tour"], errors="coerce").astype("Int64")
+# Colonnes blocs
+BASE_BLOCS = [
+    "droite_modere",
+    "gauche_modere",
+    "gauche_dure",
+    "droite_dure",
+    "centre",
+    "extreme_gauche",
+    "extreme_droite",
+    "autre",
+]
+BLOC_LABELS = [b for b in BASE_BLOCS if f"part_bloc_{b}" in df_sete.columns]
+BLOC_COLS = [f"part_bloc_{b}" for b in BLOC_LABELS]
+# =========================
+# Fonctions métier
+# =========================
+def compute_national_reference(df_all, type_scrutin, tour):
+    """
+    Calcule les parts nationales par bloc pour un scrutin et un tour donnés.
+    """
+    if not BLOC_COLS:
+        return {}
+    df_nat = df_all[
+        (df_all["type_scrutin"] == type_scrutin)
+        & (df_all["tour"] == tour)
+    ]
+    # pondération par exprimés
+    weights = df_nat["exprimes"].replace(0, np.nan)
+    national = {}
+    for col in BLOC_COLS:
+        national[col] = np.nansum(df_nat[col] * weights) / np.nansum(weights)
+    return national
+def table_sete(type_scrutin, tour):
+    if not BLOC_COLS:
+        return pd.DataFrame({"info": ["Colonnes part_bloc_* absentes."]})
+    tour_val = pd.to_numeric(tour, errors="coerce")
+    if pd.isna(tour_val):
+        return pd.DataFrame({"info": ["Tour invalide."]})
+    # données locales
+    local = df_sete[
+        (df_sete["type_scrutin"] == type_scrutin)
+        & (df_sete["tour"] == int(tour_val))
+    ].copy()
+    if local.empty:
+        return pd.DataFrame({"info": ["Aucune donnée disponible"]})
+    # référence nationale
+    nat = compute_national_reference(df, type_scrutin, tour)
+    # construction tableau affiché
+    rows = []
+    for _, row in local.iterrows():
+        r = {
+            "code_bv": row["code_bv"],
+            "nom_bv": row.get("nom_bv", ""),
+        }
+        for col in BLOC_COLS:
+            part = row[col]
+            ecart = part - nat.get(col, 0)
+            r[col.replace("part_bloc_", "")] = round(part * 100, 2)
+            r[col.replace("part_bloc_", "") + "_ecart_nat"] = round(ecart * 100, 2)
+        rows.append(r)
+    result = pd.DataFrame(rows)
+    # tri par écart extrême droite (exemple)
+    if "extreme_droite_ecart_nat" in result.columns:
+        result = result.sort_values(
+            "extreme_droite_ecart_nat", ascending=False
+        )
+    return result
+def get_bv_timeseries(code_bv: str, tour: int | None) -> pd.DataFrame:
+    if df_sete.empty or not BLOC_COLS:
+        return pd.DataFrame(columns=["date_scrutin"] + BLOC_COLS)
+    subset = df_sete[df_sete["code_bv"].astype(str) == str(code_bv)].copy()
+    subset["tour"] = pd.to_numeric(subset["tour"], errors="coerce").astype("Int64")
+    if tour is not None:
+        subset = subset[subset["tour"] == tour]
+    subset = subset.dropna(subset=["date_scrutin"]).sort_values("date_scrutin")
+    return subset[["date_scrutin"] + BLOC_COLS]
+def plot_bv_timeseries(code_bv: str, tour_choice, bloc_choices=None):
+    tour = None if tour_choice == "Tous" else int(tour_choice)
+    fig, ax = plt.subplots(figsize=(8, 4))
+    if not BLOC_COLS:
+        ax.text(0.5, 0.5, "Colonnes part_bloc_* absentes.", ha="center", va="center")
+        ax.axis("off")
+        return fig
+    df_ts = get_bv_timeseries(code_bv, tour)
+    if df_ts.empty:
+        tours_avail = (
+            df_sete[df_sete["code_bv"].astype(str) == str(code_bv)]["tour"]
+            .dropna()
+            .unique()
+            .tolist()
+        )
+        ax.text(
+            0.5,
+            0.5,
+            f"Aucune donnée après filtre tour={tour}. Valeurs disponibles: {sorted(tours_avail)}",
+            ha="center",
+            va="center",
+            wrap=True,
+        )
+        ax.axis("off")
+        return fig
+    selected = bloc_choices or BLOC_LABELS
+    selected_cols = [f"part_bloc_{b}" for b in selected if f"part_bloc_{b}" in df_ts.columns]
+    if not selected_cols:
+        ax.text(0.5, 0.5, "Aucun bloc sélectionné.", ha="center", va="center")
+        ax.axis("off")
+        return fig
+    for col in selected_cols:
+        ax.plot(df_ts["date_scrutin"], df_ts[col], label=col.replace("part_bloc_", ""))
+    ax.set_title(f"Évolution politique – BV {code_bv}")
+    ax.set_ylabel("Part des voix (exprimés)")
+    ax.grid(True, alpha=0.3)
+    ax.legend(bbox_to_anchor=(1.02, 1), loc="upper left", borderaxespad=0, fontsize=8)
+    fig.autofmt_xdate()
+    fig.tight_layout()
+    return fig
+# =========================
+# Interface Gradio
+# =========================
+def format_bv_label(code_bv: str) -> str:
+    code_str = str(code_bv)
+    if code_str.isdigit() and code_str.startswith(SETE_CODE_INSEE) and len(code_str) == 9:
+        bureau_num = code_str[-4:]
+        return f"BV {int(bureau_num)} ({code_str})"
+    return code_str
+bv_values = (
+    sorted(df_sete["code_bv"].astype(str).unique().tolist())
+    if "code_bv" in df_sete.columns
+    else []
+)
+bv_choices = [(format_bv_label(code), code) for code in bv_values]
+scrutins = sorted(df_sete["type_scrutin"].unique())
+tours = sorted(df_sete["tour"].dropna().unique())
+tour_options = ["Tous"] + [str(t) for t in tours]
+status_messages = []
+if commune_warning:
+    status_messages.append(commune_warning)
+if df_sete.empty:
+    status_messages.append(
+        "Aucune ligne pour la commune 34301 (Sète). Vérifie `code_commune` / le filtre."
+    )
+if not BLOC_COLS:
+    status_messages.append("Colonnes part_bloc_* absentes dans bv_features.")
+missing_blocs = [f"part_bloc_{b}" for b in BASE_BLOCS if f"part_bloc_{b}" not in df_sete.columns]
+if missing_blocs:
+    status_messages.append(f"Colonnes blocs manquantes: {', '.join(missing_blocs)}")
+tour_dtype = str(df_sete["tour"].dtype) if "tour" in df_sete.columns else "n/a"
+tour_sample = sorted(df_sete["tour"].dropna().unique().tolist())[:10]
+status_messages.append(f"tour dtype: {tour_dtype}")
+status_messages.append(f"tours disponibles (échantillon): {tour_sample}")
+status_messages.append(
+    f"df_sete: {len(df_sete)} lignes, {df_sete['code_bv'].nunique() if 'code_bv' in df_sete.columns else 0} BV"
+)
+status_messages.append(f"blocs actifs: {', '.join(BLOC_LABELS) if BLOC_LABELS else 'aucun'}")
+status_text = "\n".join(f"- {msg}" for msg in status_messages)
+with gr.Blocks(title="Résultats électoraux – Bureaux de vote de Sète") as app:
+    gr.Markdown(
+        """
+        # 🗳️ Résultats électoraux – Ville de Sète
+        **Bureaux de vote uniquement – comparaison au niveau national**
+        Les pourcentages sont exprimés en **% des exprimés**.
+        Les écarts sont en **points par rapport au national**.
+        """
+    )
+    if status_text:
+        gr.Markdown(f"**Alertes**\n{status_text}")
+    with gr.Tabs():
+        with gr.Tab("Bureaux de vote"):
+            with gr.Row():
+                type_scrutin = gr.Dropdown(
+                    scrutins,
+                    label="Type de scrutin",
+                    value=scrutins[0] if scrutins else None,
+                )
+                tour = gr.Dropdown(
+                    tours,
+                    label="Tour",
+                    value=tours[0] if tours else None,
+                )
+            output = gr.Dataframe(
+                label="Bureaux de vote – parts locales et écart au national",
+                interactive=False,
+                wrap=True,
+            )
+            btn = gr.Button("Afficher")
+            btn.click(
+                fn=table_sete,
+                inputs=[type_scrutin, tour],
+                outputs=output,
+            )
+        with gr.Tab("Évolution temporelle"):
+            bv_selector = gr.Dropdown(
+                bv_choices,
+                label="Bureau de vote",
+                value=bv_values[0] if bv_values else None,
+            )
+            tour_selector = gr.Dropdown(
+                tour_options,
+                label="Tour",
+                value="Tous",
+            )
+            blocs_selector = gr.Dropdown(
+                BLOC_LABELS,
+                label="Blocs à afficher",
+                value=BLOC_LABELS,
+                multiselect=True,
+            )
+            plot = gr.Plot(
+                value=plot_bv_timeseries(
+                    bv_values[0] if bv_values else "", "Tous", BLOC_LABELS
+                )
+            )
+            bv_selector.change(
+                fn=plot_bv_timeseries,
+                inputs=[bv_selector, tour_selector, blocs_selector],
+                outputs=plot,
+            )
+            tour_selector.change(
+                fn=plot_bv_timeseries,
+                inputs=[bv_selector, tour_selector, blocs_selector],
+                outputs=plot,
+            )
+            blocs_selector.change(
+                fn=plot_bv_timeseries,
+                inputs=[bv_selector, tour_selector, blocs_selector],
+                outputs=plot,
+            )
+# =========================
+# Lancement
+# =========================
+# Tests manuels:
+# 1) Lancer l'app.
+# 2) Onglet "Évolution temporelle": choisir un BV, tester Tous / Tour 1 / Tour 2.
+# 3) Vérifier que la légende n'occulte pas les courbes et que seuls 8 blocs apparaissent.
+# 4) Vérifier le libellé BV (BV X + code) et les alertes en haut de page.
+if __name__ == "__main__":
+    app.launch()

app/gradio_app.py ADDED Viewed

	@@ -0,0 +1,1645 @@

+from __future__ import annotations
+import base64
+import io
+import json
+import logging
+import re
+import warnings
+from html import escape
+from pathlib import Path
+from typing import Dict, Tuple
+import gradio as gr
+import joblib
+import numpy as np
+import pandas as pd
+import sqlalchemy as sa
+from src.constants import CANDIDATE_CATEGORIES
+from src.db.schema import get_engine
+from src.features.build_features import (
+    aggregate_by_event,
+    compute_national_reference,
+    expand_by_category,
+    load_elections_long,
+    load_mapping,
+)
+LOGGER = logging.getLogger(__name__)
+COMMUNE_CODE_SETE = "34301"
+MODEL_DIR = Path("models")
+FEATURE_COLS_PATH = MODEL_DIR / "feature_columns.json"
+RESIDUAL_INTERVALS_PATH = Path("reports/residual_intervals.json")
+GEO_DIR = Path("data/geo")
+DEFAULT_TARGETS = [
+    ("municipales", 2026),
+    ("legislatives", 2027),
+    ("presidentielles", 2027),
+]
+FEATURE_CACHE: Dict[Tuple[str, int], Tuple[pd.DataFrame, Dict[str, Dict[Tuple[str, str], float]]]] = {}
+ELECTION_KEY_SEP = "|"
+ELECTION_TYPE_LABELS = {
+    "municipales": "Municipales",
+    "legislatives": "Législatives",
+    "presidentielles": "Présidentielles",
+    "europeennes": "Européennes",
+    "regionales": "Régionales",
+    "departementales": "Départementales",
+}
+HISTORY_OUTPUT_COLUMNS = ["categorie", "score_%"]
+PREDICTION_OUTPUT_COLUMNS = ["categorie", "nombre"]
+INTERVAL_OUTPUT_COLUMNS = ["categorie", "baseline_%", "min_%", "max_%", "baseline", "min", "max"]
+SIM_OUTPUT_COLUMNS = ["categorie", "baseline", "apres_transfert", "delta"]
+OPPORTUNITY_OUTPUT_COLUMNS = [
+    "bureau",
+    "gain_cible",
+    "score_base",
+    "score_apres",
+    "top_base",
+    "top_apres",
+    "bascule",
+]
+DISPLAY_CATEGORY_ORDER = [
+    "extreme_gauche",
+    "gauche_dure",
+    "gauche_modere",
+    "centre",
+    "droite_modere",
+    "droite_dure",
+    "extreme_droite",
+]
+PREDICTION_CATEGORY_ORDER = DISPLAY_CATEGORY_ORDER + ["blancs", "nuls", "abstention"]
+DISPLAY_CATEGORY_LABELS = {
+    "extreme_gauche": "extrême-gauche",
+    "gauche_dure": "gauche dure",
+    "gauche_modere": "gauche modérée",
+    "centre": "centre",
+    "droite_modere": "droite modérée",
+    "droite_dure": "droite dure",
+    "extreme_droite": "extrême-droite",
+    "blancs": "blancs",
+    "nuls": "nuls",
+    "abstention": "abstentions",
+}
+DISPLAY_CATEGORY_COLORS = {
+    "extreme_gauche": "#7f1d1d",
+    "gauche_dure": "#dc2626",
+    "gauche_modere": "#f472b6",
+    "centre": "#facc15",
+    "droite_modere": "#60a5fa",
+    "droite_dure": "#1e3a8a",
+    "extreme_droite": "#111827",
+}
+EXTRA_CATEGORY_COLORS = {
+    "blancs": "#e5e7eb",
+    "nuls": "#9ca3af",
+    "abstention": "#6b7280",
+}
+DISPLAY_LABEL_COLORS = {
+    DISPLAY_CATEGORY_LABELS[key]: color for key, color in DISPLAY_CATEGORY_COLORS.items()
+}
+DISPLAY_LABEL_COLORS.update(
+    {DISPLAY_CATEGORY_LABELS[key]: color for key, color in EXTRA_CATEGORY_COLORS.items()}
+)
+CATEGORY_LABEL_TO_KEY = {label: key for key, label in DISPLAY_CATEGORY_LABELS.items()}
+TRANSFER_CATEGORY_LABELS = [DISPLAY_CATEGORY_LABELS[key] for key in PREDICTION_CATEGORY_ORDER]
+DEFAULT_RESIDUAL_SPREAD = 0.03
+INTERVAL_BANDS = {
+    "80% (p10-p90)": ("q10", "q90"),
+    "90% (p05-p95)": ("q05", "q95"),
+}
+NEUTRAL_MARGIN_SHARE = 0.10
+try:
+    from numpy import RankWarning as NP_RANK_WARNING  # type: ignore[attr-defined]
+except Exception:
+    class NP_RANK_WARNING(UserWarning):
+        pass
+def ordered_categories() -> list[str]:
+    return [cat for cat in DISPLAY_CATEGORY_ORDER if cat in CANDIDATE_CATEGORIES]
+def load_residual_intervals(path: Path = RESIDUAL_INTERVALS_PATH) -> Dict[str, object]:
+    if not path.exists():
+        return {}
+    try:
+        payload = json.loads(path.read_text())
+    except Exception:
+        return {}
+    if isinstance(payload, dict):
+        return payload
+    return {}
+def get_interval_bounds(
+    residuals: Dict[str, Dict[str, float]],
+    category: str,
+    band_label: str,
+) -> Tuple[float, float]:
+    keys = INTERVAL_BANDS.get(band_label, ("q10", "q90"))
+    cat_resid = residuals.get(category, {})
+    low = cat_resid.get(keys[0])
+    high = cat_resid.get(keys[1])
+    if low is None or high is None:
+        return -DEFAULT_RESIDUAL_SPREAD, DEFAULT_RESIDUAL_SPREAD
+    return float(low), float(high)
+def build_interval_table(
+    shares_by_cat: Dict[str, float],
+    exprimes_total: int,
+    residuals: Dict[str, Dict[str, float]],
+    band_label: str,
+) -> pd.DataFrame:
+    rows = []
+    for cat in ordered_categories():
+        share = float(shares_by_cat.get(cat, 0.0))
+        low_resid, high_resid = get_interval_bounds(residuals, cat, band_label)
+        share_low = float(np.clip(share + low_resid, 0.0, 1.0))
+        share_high = float(np.clip(share + high_resid, 0.0, 1.0))
+        count = int(round(share * exprimes_total))
+        count_low = int(round(share_low * exprimes_total))
+        count_high = int(round(share_high * exprimes_total))
+        if count_low > count_high:
+            count_low, count_high = count_high, count_low
+            share_low, share_high = share_high, share_low
+        rows.append(
+            {
+                "categorie": DISPLAY_CATEGORY_LABELS.get(cat, cat),
+                "baseline_%": round(share * 100, 1),
+                "min_%": round(share_low * 100, 1),
+                "max_%": round(share_high * 100, 1),
+                "baseline": count,
+                "min": count_low,
+                "max": count_high,
+            }
+        )
+    return pd.DataFrame(rows, columns=INTERVAL_OUTPUT_COLUMNS)
+def build_interval_chart(
+    df: pd.DataFrame,
+    *,
+    value_col: str = "baseline",
+    low_col: str = "min",
+    high_col: str = "max",
+    color_map: Dict[str, str] | None = None,
+    ylabel: str = "Nombre d'électeurs",
+):
+    try:
+        import matplotlib.pyplot as plt
+    except Exception:
+        return None
+    if df.empty or value_col not in df.columns:
+        return None
+    labels = df["categorie"].astype(str).tolist()
+    values = df[value_col].astype(float).to_numpy()
+    low_vals = df[low_col].astype(float).to_numpy()
+    high_vals = df[high_col].astype(float).to_numpy()
+    lower_err = np.maximum(0.0, values - low_vals)
+    upper_err = np.maximum(0.0, high_vals - values)
+    yerr = np.vstack([lower_err, upper_err])
+    colors = [color_map.get(label, "#3b82f6") for label in labels] if color_map else "#3b82f6"
+    plt.figure(figsize=(6, 3))
+    plt.bar(labels, values, color=colors, yerr=yerr, capsize=4)
+    plt.xticks(rotation=30, ha="right")
+    plt.ylabel(ylabel)
+    plt.tight_layout()
+    return plt
+def apply_transfers(
+    counts: Dict[str, int],
+    total_inscrits: int,
+    transfers: list[Tuple[str, str, float]],
+) -> Dict[str, int]:
+    updated = {key: int(value) for key, value in counts.items()}
+    for source, target, delta_pct in transfers:
+        if delta_pct <= 0 or source == target:
+            continue
+        delta_count = int(round(total_inscrits * float(delta_pct) / 100.0))
+        if delta_count <= 0:
+            continue
+        available = max(0, int(updated.get(source, 0)))
+        moved = min(available, delta_count)
+        updated[source] = available - moved
+        updated[target] = int(updated.get(target, 0)) + moved
+    return updated
+def build_simulation_table(
+    baseline: Dict[str, int],
+    updated: Dict[str, int],
+) -> pd.DataFrame:
+    rows = []
+    for cat in PREDICTION_CATEGORY_ORDER:
+        base = int(baseline.get(cat, 0))
+        new = int(updated.get(cat, 0))
+        rows.append(
+            {
+                "categorie": DISPLAY_CATEGORY_LABELS.get(cat, cat),
+                "baseline": base,
+                "apres_transfert": new,
+                "delta": new - base,
+            }
+        )
+    return pd.DataFrame(rows, columns=SIM_OUTPUT_COLUMNS)
+def load_geojson_features(geo_dir: Path = GEO_DIR) -> list[dict]:
+    if not geo_dir.exists():
+        return []
+    paths = sorted(geo_dir.glob("*.geojson")) + sorted(geo_dir.glob("*.json"))
+    features: list[dict] = []
+    for path in paths:
+        try:
+            payload = json.loads(path.read_text())
+        except Exception:
+            continue
+        if isinstance(payload, dict):
+            features.extend(payload.get("features", []))
+    return features
+def extract_bureau_number(label: str | None) -> int | None:
+    if not label:
+        return None
+    match = re.search(r"(\d+)", str(label))
+    if not match:
+        return None
+    try:
+        return int(match.group(1))
+    except ValueError:
+        return None
+def match_bureau_code(commune_code: str, bureau_num: int, available_codes: set[str]) -> str:
+    padded = str(bureau_num).zfill(4)
+    candidates = [f"{commune_code}-{padded}", f"{commune_code}{padded}"]
+    for candidate in candidates:
+        if candidate in available_codes:
+            return candidate
+    return candidates[-1]
+def _iter_coords(geom: dict) -> list[Tuple[float, float]]:
+    coords = []
+    geom_type = geom.get("type")
+    if geom_type == "Polygon":
+        for ring in geom.get("coordinates", []):
+            coords.extend([(lon, lat) for lon, lat in ring])
+    elif geom_type == "MultiPolygon":
+        for polygon in geom.get("coordinates", []):
+            for ring in polygon:
+                coords.extend([(lon, lat) for lon, lat in ring])
+    return coords
+def geojson_bounds(features: list[dict]) -> Tuple[Tuple[float, float], Tuple[float, float]] | None:
+    lons = []
+    lats = []
+    for feature in features:
+        geom = feature.get("geometry") or {}
+        for lon, lat in _iter_coords(geom):
+            lons.append(lon)
+            lats.append(lat)
+    if not lons or not lats:
+        return None
+    return (min(lats), min(lons)), (max(lats), max(lons))
+def build_prediction_table_from_counts(counts_by_cat: Dict[str, int]) -> pd.DataFrame:
+    rows = []
+    for cat in ordered_categories():
+        rows.append({"categorie": DISPLAY_CATEGORY_LABELS.get(cat, cat), "nombre": int(counts_by_cat.get(cat, 0))})
+    for extra in ["blancs", "nuls", "abstention"]:
+        rows.append(
+            {
+                "categorie": DISPLAY_CATEGORY_LABELS[extra],
+                "nombre": int(counts_by_cat.get(extra, 0)),
+            }
+        )
+    return pd.DataFrame(rows, columns=PREDICTION_OUTPUT_COLUMNS)
+def chart_base64_from_df(
+    df: pd.DataFrame,
+    value_col: str,
+    ylabel: str,
+    color_map: Dict[str, str],
+) -> str | None:
+    try:
+        import matplotlib.pyplot as plt
+    except Exception:
+        return None
+    if df.empty or value_col not in df.columns:
+        return None
+    labels = df["categorie"].astype(str).tolist()
+    values = pd.to_numeric(df[value_col], errors="coerce").fillna(0).tolist()
+    colors = [color_map.get(label, "#3b82f6") for label in labels]
+    fig, ax = plt.subplots(figsize=(4.5, 3.2))
+    ax.barh(labels, values, color=colors)
+    ax.invert_yaxis()
+    ax.set_xlabel(ylabel)
+    ax.tick_params(axis="y", labelsize=8)
+    fig.tight_layout()
+    buf = io.BytesIO()
+    fig.savefig(buf, format="png", dpi=150)
+    plt.close(fig)
+    return base64.b64encode(buf.getvalue()).decode("ascii")
+def build_map_popup_html(
+    bureau_label: str,
+    table_df: pd.DataFrame,
+    chart_b64: str | None,
+    meta: str | None,
+) -> str:
+    title_html = f"<strong>{escape(bureau_label)}</strong>"
+    meta_html = f"<div style='margin:4px 0;'>{escape(meta)}</div>" if meta else ""
+    table_html = table_df.to_html(index=False, border=0)
+    img_html = ""
+    if chart_b64:
+        img_html = (
+            "<div style='margin-top:6px;'>"
+            f"<img src='data:image/png;base64,{chart_b64}' style='width:320px;height:auto;'/>"
+            "</div>"
+        )
+    return f"<div style='font-size:12px;'>{title_html}{meta_html}{table_html}{img_html}</div>"
+def build_map_legend_html() -> str:
+    parts = []
+    for key in DISPLAY_CATEGORY_ORDER:
+        label = DISPLAY_CATEGORY_LABELS.get(key, key)
+        color = DISPLAY_CATEGORY_COLORS.get(key, "#9ca3af")
+        parts.append(
+            f"<span style='display:inline-flex;align-items:center;margin-right:10px;'>"
+            f"<span style='width:12px;height:12px;background:{color};display:inline-block;margin-right:6px;border:1px solid #111827;'></span>"
+            f"{escape(label)}</span>"
+        )
+    parts.append(
+        "<span style='display:inline-flex;align-items:center;margin-right:10px;'>"
+        "<span style='width:12px;height:12px;background:#ffffff;display:inline-block;margin-right:6px;border:1px solid #111827;'></span>"
+        "écart gauche/droite ≤ 10%</span>"
+    )
+    parts.append(
+        "<span style='display:inline-flex;align-items:center;margin-right:10px;'>"
+        "<span style='width:12px;height:12px;background:#9ca3af;display:inline-block;margin-right:6px;border:1px solid #111827;'></span>"
+        "données indisponibles</span>"
+    )
+    parts.append("<span style='font-size:12px;color:#6b7280;'>abstention non utilisée pour la couleur</span>")
+    return "<div style='margin-bottom:8px;'>" + " ".join(parts) + "</div>"
+def build_bureau_map_html(
+    backend: "PredictorBackend",
+    target_type: str,
+    target_year: int,
+) -> str:
+    try:
+        import folium
+    except Exception:
+        return "<p>Folium n'est pas disponible. Installe-le via requirements.txt.</p>"
+    features = load_geojson_features()
+    if not features:
+        return "<p>Aucune geojson trouvée dans data/geo.</p>"
+    bounds = geojson_bounds(features)
+    if bounds is None:
+        return "<p>Impossible de calculer l'emprise de la carte.</p>"
+    (min_lat, min_lon), (max_lat, max_lon) = bounds
+    center = [(min_lat + max_lat) / 2, (min_lon + max_lon) / 2]
+    fmap = folium.Map(location=center, zoom_start=13, tiles="cartodbpositron")
+    available_codes = set(backend.available_bureaux())
+    for feature in features:
+        props = feature.get("properties", {})
+        label = props.get("name") or "Bureau"
+        bureau_num = extract_bureau_number(label)
+        if bureau_num is None:
+            code_bv = None
+        else:
+            code_bv = match_bureau_code(COMMUNE_CODE_SETE, bureau_num, available_codes)
+        fill_color = "#9ca3af"
+        popup_html = None
+        if code_bv is not None:
+            details, _, meta = backend.predict_bureau_details(code_bv, target_type, target_year)
+            if details is not None:
+                shares = details["shares_by_cat"]
+                left_share = float(shares.get("gauche_dure", 0.0) + shares.get("gauche_modere", 0.0))
+                right_share = float(shares.get("droite_dure", 0.0) + shares.get("droite_modere", 0.0))
+                if abs(left_share - right_share) <= NEUTRAL_MARGIN_SHARE:
+                    fill_color = "#ffffff"
+                else:
+                    winner = max(shares, key=shares.get)
+                    fill_color = DISPLAY_CATEGORY_COLORS.get(winner, fill_color)
+                table_df = build_prediction_table_from_counts(details["counts"])
+                chart_b64 = chart_base64_from_df(
+                    table_df,
+                    value_col="nombre",
+                    ylabel="Nombre d'electeurs",
+                    color_map=DISPLAY_LABEL_COLORS,
+                )
+                popup_html = build_map_popup_html(str(label), table_df, chart_b64, meta)
+        def _style(_: dict, color=fill_color):
+            return {
+                "fillColor": color,
+                "color": "#111827",
+                "weight": 1,
+                "fillOpacity": 0.6,
+            }
+        geo = folium.GeoJson(feature, style_function=_style)
+        if popup_html:
+            geo.add_child(folium.Popup(popup_html, max_width=450))
+        geo.add_child(folium.Tooltip(str(label)))
+        geo.add_to(fmap)
+    fmap.fit_bounds([[min_lat, min_lon], [max_lat, max_lon]])
+    return fmap._repr_html_()
+def _project_rate(
+    series: pd.Series,
+    years: pd.Series,
+    target_year: int,
+    *,
+    min_points_trend: int = 3,
+    clamp_to_observed: bool = True,
+) -> float | None:
+    df = pd.DataFrame(
+        {
+            "value": pd.to_numeric(series, errors="coerce"),
+            "year": pd.to_numeric(years, errors="coerce"),
+        }
+    ).dropna()
+    if df.empty:
+        return None
+    values = df["value"].to_numpy()
+    years_arr = df["year"].to_numpy()
+    if len(set(years_arr)) >= min_points_trend and len(df) >= min_points_trend:
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore", category=NP_RANK_WARNING)
+            try:
+                slope, intercept = np.polyfit(years_arr, values, 1)
+                projected = slope * target_year + intercept
+            except Exception:
+                projected = values[-1]
+    else:
+        projected = values[-1]
+    if clamp_to_observed and len(values):
+        projected = min(max(projected, float(np.nanmin(values))), float(np.nanmax(values)))
+    return float(min(1.0, max(0.0, projected)))
+def _allocate_counts(shares: np.ndarray, total: int) -> np.ndarray:
+    if total <= 0 or shares.size == 0:
+        return np.zeros_like(shares, dtype=int)
+    shares = np.clip(shares, 0, None)
+    if shares.sum() == 0:
+        return np.zeros_like(shares, dtype=int)
+    shares = shares / shares.sum()
+    raw = shares * total
+    floors = np.floor(raw)
+    remainder = int(total - floors.sum())
+    if remainder > 0:
+        order = np.argsort(-(raw - floors))
+        for idx in order[:remainder]:
+            floors[idx] += 1
+    return floors.astype(int)
+def load_bureau_event_stats(commune_code: str) -> pd.DataFrame:
+    candidates = [
+        Path("data/processed/elections_blocs.parquet"),
+        Path("data/processed/elections_blocs.csv"),
+        Path("data/interim/elections_long.parquet"),
+        Path("data/interim/elections_long.csv"),
+    ]
+    df = pd.DataFrame()
+    best = pd.DataFrame()
+    for path in candidates:
+        if not path.exists():
+            continue
+        if path.suffix == ".parquet":
+            df = pd.read_parquet(path)
+        else:
+            df = pd.read_csv(path, sep=";")
+        if df.empty:
+            continue
+        if "type_scrutin" not in df.columns and "election_type" in df.columns:
+            df["type_scrutin"] = df["election_type"]
+        if "annee" not in df.columns and "election_year" in df.columns:
+            df["annee"] = df["election_year"]
+        if "tour" not in df.columns and "round" in df.columns:
+            df["tour"] = df["round"]
+        df["date_scrutin"] = pd.to_datetime(df.get("date_scrutin"), errors="coerce")
+        for col in ["inscrits", "votants", "blancs", "nuls"]:
+            if col in df.columns:
+                df[col] = pd.to_numeric(df[col], errors="coerce")
+            else:
+                df[col] = np.nan
+        if "code_commune" in df.columns:
+            df["code_commune"] = df["code_commune"].astype(str)
+            df = df[df["code_commune"] == str(commune_code)]
+        else:
+            df = df[df["code_bv"].astype(str).str.startswith(str(commune_code))]
+        df = df.dropna(subset=["code_bv"])
+        if df.empty:
+            continue
+        has_blancs = df["blancs"].notna().any() or df["nuls"].notna().any()
+        if has_blancs:
+            best = df
+            break
+        if best.empty:
+            best = df
+    df = best
+    if df.empty:
+        return df
+    group_cols = [col for col in ["code_bv", "type_scrutin", "annee", "tour", "date_scrutin"] if col in df.columns]
+    agg = df.groupby(group_cols, as_index=False).agg(
+        inscrits=("inscrits", "max"),
+        votants=("votants", "max"),
+        blancs=("blancs", "max"),
+        nuls=("nuls", "max"),
+    )
+    if "date_scrutin" in agg.columns:
+        agg = agg.sort_values("date_scrutin")
+    agg["election_type"] = agg.get("type_scrutin")
+    agg["election_type"] = agg["election_type"].astype("string").str.strip().str.lower()
+    agg["election_year"] = pd.to_numeric(agg.get("annee"), errors="coerce")
+    agg["round"] = pd.to_numeric(agg.get("tour"), errors="coerce").fillna(1).astype(int)
+    base_inscrits = agg["inscrits"].replace(0, np.nan)
+    agg["turnout_pct"] = agg["votants"] / base_inscrits
+    agg["blancs_pct"] = agg["blancs"] / base_inscrits
+    agg["nuls_pct"] = agg["nuls"] / base_inscrits
+    return agg[
+        [
+            "code_bv",
+            "election_type",
+            "election_year",
+            "round",
+            "date_scrutin",
+            "inscrits",
+            "votants",
+            "blancs",
+            "nuls",
+            "turnout_pct",
+            "blancs_pct",
+            "nuls_pct",
+        ]
+    ]
+def load_commune_event_stats(commune_code: str) -> pd.DataFrame:
+    candidates = [
+        Path("data/processed/commune_event_stats.parquet"),
+        Path("data/processed/commune_event_stats.csv"),
+    ]
+    df = pd.DataFrame()
+    for path in candidates:
+        if not path.exists():
+            continue
+        if path.suffix == ".parquet":
+            df = pd.read_parquet(path)
+        else:
+            df = pd.read_csv(path, sep=";")
+        if not df.empty:
+            break
+    if df.empty:
+        return df
+    if "type_scrutin" not in df.columns and "election_type" in df.columns:
+        df["type_scrutin"] = df["election_type"]
+    if "annee" not in df.columns and "election_year" in df.columns:
+        df["annee"] = df["election_year"]
+    if "tour" not in df.columns and "round" in df.columns:
+        df["tour"] = df["round"]
+    df["date_scrutin"] = pd.to_datetime(df.get("date_scrutin"), errors="coerce")
+    for col in ["inscrits", "votants", "blancs", "nuls"]:
+        if col in df.columns:
+            df[col] = pd.to_numeric(df[col], errors="coerce")
+        else:
+            df[col] = np.nan
+    if "code_commune" in df.columns:
+        df["code_commune"] = df["code_commune"].astype(str)
+        df = df[df["code_commune"] == str(commune_code)]
+    else:
+        return pd.DataFrame()
+    if df.empty:
+        return df
+    base_inscrits = df["inscrits"].replace(0, np.nan)
+    if "turnout_pct" not in df.columns:
+        df["turnout_pct"] = df["votants"] / base_inscrits
+    if "blancs_pct" not in df.columns:
+        df["blancs_pct"] = df["blancs"] / base_inscrits
+    if "nuls_pct" not in df.columns:
+        df["nuls_pct"] = df["nuls"] / base_inscrits
+    df["election_type"] = df["type_scrutin"].astype("string").str.strip().str.lower()
+    df["election_year"] = pd.to_numeric(df.get("annee"), errors="coerce")
+    df["round"] = pd.to_numeric(df.get("tour"), errors="coerce").fillna(1).astype(int)
+    return df[
+        [
+            "code_commune",
+            "election_type",
+            "election_year",
+            "round",
+            "date_scrutin",
+            "inscrits",
+            "votants",
+            "blancs",
+            "nuls",
+            "turnout_pct",
+            "blancs_pct",
+            "nuls_pct",
+        ]
+    ]
+def format_backend_label(backend_kind: str) -> str:
+    return "PostgreSQL" if backend_kind == "postgres" else "fichiers locaux"
+def format_election_type_label(election_type: str) -> str:
+    label = ELECTION_TYPE_LABELS.get(election_type)
+    if label:
+        return label
+    return str(election_type).replace("_", " ").title()
+def format_election_label(
+    election_type: str,
+    election_year: int,
+    round_num: int,
+    date_scrutin: pd.Timestamp | None = None,
+) -> str:
+    base = f"{format_election_type_label(election_type)} {election_year} - Tour {round_num}"
+    if date_scrutin is None or pd.isna(date_scrutin):
+        return base
+    date_value = pd.to_datetime(date_scrutin).date().isoformat()
+    return f"{base} ({date_value})"
+def format_election_key(election_type: str, election_year: int, round_num: int) -> str:
+    return f"{election_type}{ELECTION_KEY_SEP}{election_year}{ELECTION_KEY_SEP}{round_num}"
+def parse_election_key(key: str) -> Tuple[str, int, int]:
+    parts = key.split(ELECTION_KEY_SEP)
+    if len(parts) != 3:
+        raise ValueError(f"Clé d'élection invalide: {key!r}")
+    return parts[0], int(parts[1]), int(parts[2])
+def format_bureau_label(code_bv: str, bureau_label: str | None) -> str:
+    code = str(code_bv)
+    suffix = code.split("-")[-1] if "-" in code else code
+    if bureau_label is not None and not pd.isna(bureau_label):
+        label = str(bureau_label).strip()
+        if label and label != code:
+            return f"{label} ({code})"
+    return f"Bureau {suffix} ({code})"
+def build_bureau_choices(history: pd.DataFrame) -> list[tuple[str, str]]:
+    if history.empty:
+        return []
+    if "bureau_label" in history.columns:
+        label_map = (
+            history[["code_bv", "bureau_label"]]
+            .dropna(subset=["code_bv"])
+            .drop_duplicates()
+            .sort_values("code_bv")
+            .groupby("code_bv", as_index=False)["bureau_label"]
+            .first()
+        )
+        return [
+            (format_bureau_label(row.code_bv, row.bureau_label), row.code_bv)
+            for row in label_map.itertuples(index=False)
+        ]
+    codes = sorted(history["code_bv"].dropna().unique().tolist())
+    return [(format_bureau_label(code, None), code) for code in codes]
+def build_history_choices(history: pd.DataFrame) -> list[tuple[str, str]]:
+    if history.empty:
+        return []
+    events = (
+        history[["election_type", "election_year", "round", "date_scrutin"]]
+        .dropna(subset=["election_type", "election_year", "round"])
+        .drop_duplicates()
+        .groupby(["election_type", "election_year", "round"], as_index=False)
+        .agg(date_scrutin=("date_scrutin", "min"))
+        .sort_values(["election_year", "election_type", "round"])
+    )
+    return [
+        (
+            format_election_label(
+                row.election_type,
+                int(row.election_year),
+                int(row.round),
+                row.date_scrutin,
+            ),
+            format_election_key(row.election_type, int(row.election_year), int(row.round)),
+        )
+        for row in events.itertuples(index=False)
+    ]
+def clean_history_frame(history: pd.DataFrame) -> pd.DataFrame:
+    if history.empty:
+        return history
+    clean = history.copy()
+    clean["code_bv"] = clean["code_bv"].astype("string").str.strip()
+    clean["election_type"] = clean["election_type"].astype("string").str.strip().str.lower()
+    clean["category"] = clean["category"].astype("string").str.strip().str.lower()
+    if "bureau_label" in clean.columns:
+        clean["bureau_label"] = clean["bureau_label"].astype("string").str.strip()
+    clean["election_year"] = pd.to_numeric(clean["election_year"], errors="coerce")
+    clean["round"] = pd.to_numeric(clean["round"], errors="coerce").fillna(1)
+    clean["date_scrutin"] = pd.to_datetime(clean["date_scrutin"], errors="coerce")
+    for col in ["share", "share_nat", "turnout_pct"]:
+        if col in clean.columns:
+            clean[col] = pd.to_numeric(clean[col], errors="coerce").clip(lower=0, upper=1)
+    clean = clean.dropna(subset=["code_bv", "election_type", "election_year", "round", "category"])
+    clean["election_year"] = clean["election_year"].astype(int)
+    clean["round"] = clean["round"].astype(int)
+    clean = clean[clean["category"].isin(CANDIDATE_CATEGORIES)]
+    return clean
+def prepare_history_table(history_slice: pd.DataFrame) -> pd.DataFrame:
+    if history_slice.empty:
+        return pd.DataFrame(columns=HISTORY_OUTPUT_COLUMNS)
+    grouped = history_slice.groupby("category", as_index=False).agg(share=("share", "sum"))
+    clean = pd.DataFrame({"category": ordered_categories()}).merge(grouped, on="category", how="left")
+    clean["share"] = pd.to_numeric(clean["share"], errors="coerce").fillna(0).clip(lower=0, upper=1)
+    clean["score_%"] = (clean["share"] * 100).round(1)
+    clean["categorie"] = clean["category"].map(DISPLAY_CATEGORY_LABELS).fillna(clean["category"])
+    return clean[HISTORY_OUTPUT_COLUMNS]
+def format_history_meta(history_slice: pd.DataFrame) -> str:
+    if history_slice.empty:
+        return ""
+    parts = []
+    dates = history_slice["date_scrutin"].dropna()
+    if not dates.empty:
+        date_value = pd.to_datetime(dates.iloc[0]).date().isoformat()
+        parts.append(f"Date du scrutin : {date_value}")
+    turnout_vals = pd.to_numeric(history_slice["turnout_pct"], errors="coerce").dropna()
+    if not turnout_vals.empty:
+        parts.append(f"Participation : {turnout_vals.iloc[0] * 100:.1f}%")
+    return " | ".join(parts)
+def _code_bv_full(commune_code: str, bureau_code: str) -> str:
+    bureau_code = str(bureau_code).zfill(4)
+    return f"{commune_code}-{bureau_code}"
+def load_history_from_db(commune_code: str) -> pd.DataFrame:
+    engine = get_engine()
+    query = sa.text(
+        """
+        select cm.insee_code as commune_code,
+               b.bureau_code,
+               b.bureau_label,
+               e.election_type,
+               e.election_year,
+               coalesce(e.round, 1) as round,
+               e.date as date_scrutin,
+               c.name as category,
+               rl.share_pct,
+               rl.turnout_pct,
+               rn.share_pct as share_nat
+        from results_local rl
+        join bureaux b on rl.bureau_id = b.id
+        join communes cm on b.commune_id = cm.id
+        join elections e on rl.election_id = e.id
+        join categories c on rl.category_id = c.id
+        left join results_national rn on rn.election_id = e.id and rn.category_id = rl.category_id
+        where cm.insee_code = :commune
+        """
+    )
+    df = pd.read_sql(query, engine, params={"commune": commune_code})
+    if df.empty:
+        raise RuntimeError("Aucune donnée dans la base pour la commune demandée.")
+    df["code_bv"] = df.apply(lambda r: _code_bv_full(r["commune_code"], r["bureau_code"]), axis=1)
+    df["date_scrutin"] = pd.to_datetime(df["date_scrutin"])
+    df["share"] = pd.to_numeric(df["share_pct"], errors="coerce") / 100
+    df["share_nat"] = pd.to_numeric(df["share_nat"], errors="coerce") / 100
+    df["turnout_pct"] = pd.to_numeric(df["turnout_pct"], errors="coerce") / 100
+    df["election_year"] = pd.to_numeric(df["election_year"], errors="coerce")
+    df["round"] = pd.to_numeric(df["round"], errors="coerce").fillna(1).astype(int)
+    return df[
+        [
+            "commune_code",
+            "code_bv",
+            "bureau_label",
+            "election_type",
+            "election_year",
+            "round",
+            "date_scrutin",
+            "category",
+            "share",
+            "share_nat",
+            "turnout_pct",
+        ]
+    ]
+def load_history_from_files(commune_code: str) -> pd.DataFrame:
+    elections_long_all = load_elections_long(
+        Path("data/interim/elections_long.parquet"),
+        commune_code=commune_code,
+    )
+    mapping = load_mapping(Path("data/mapping_candidats_blocs.csv"))
+    expanded_all = expand_by_category(elections_long_all, mapping)
+    local_all = aggregate_by_event(expanded_all)
+    nat = compute_national_reference(local_all)
+    local = local_all[local_all["commune_code"] == commune_code].copy()
+    local = local.merge(nat, on=["election_type", "election_year", "round", "category"], how="left")
+    # Columns already in aggregate_by_event/compute_national_reference
+    if "share" not in local.columns:
+        raise RuntimeError("Colonne share absente du dataset local (fallback fichiers).")
+    local["bureau_label"] = None
+    local["share_nat"] = local.get("share_nat")
+    local["turnout_pct"] = local.get("turnout_pct")
+    return local.rename(
+        columns={
+            "annee": "election_year",
+            "tour": "round",
+        }
+    )[
+        [
+            "commune_code",
+            "code_bv",
+            "bureau_label",
+            "election_type",
+            "election_year",
+            "round",
+            "date_scrutin",
+            "category",
+            "share",
+            "share_nat",
+            "turnout_pct",
+        ]
+    ]
+def references_from_history(history: pd.DataFrame, target_year: int) -> Dict[str, Dict[Tuple[str, str], float]]:
+    hist = history[history["election_year"] < target_year].copy()
+    leg = (
+        hist[hist["election_type"] == "legislatives"]
+        .sort_values("date_scrutin")
+        .groupby(["code_bv", "category"])["share"]
+        .last()
+    )
+    mun2020 = (
+        hist[(hist["election_type"] == "municipales") & (hist["election_year"] == 2020)]
+        .sort_values("date_scrutin")
+        .groupby(["code_bv", "category"])["share"]
+        .last()
+    )
+    return {"leg": leg.to_dict(), "mun2020": mun2020.to_dict()}
+def build_features_from_history(history: pd.DataFrame, target_type: str, target_year: int) -> pd.DataFrame:
+    hist = history[history["election_year"] < target_year].copy()
+    if hist.empty:
+        return pd.DataFrame()
+    hist = hist.sort_values("date_scrutin")
+    hist["dev_to_nat"] = hist["share"] - hist["share_nat"]
+    last_any_share = hist.groupby(["code_bv", "category"])["share"].last()
+    last_any_dev = hist.groupby(["code_bv", "category"])["dev_to_nat"].last()
+    last_type_share = (
+        hist[hist["election_type"] == target_type]
+        .groupby(["code_bv", "category"])["share"]
+        .last()
+    )
+    last_type_dev = (
+        hist[hist["election_type"] == target_type]
+        .groupby(["code_bv", "category"])["dev_to_nat"]
+        .last()
+    )
+    swing_any = (
+        hist.groupby(["code_bv", "category"])["share"]
+        .apply(lambda s: s.iloc[-1] - s.iloc[-2] if len(s) >= 2 else np.nan)
+        .rename("swing_any")
+    )
+    turnout_any = hist.groupby("code_bv")["turnout_pct"].last()
+    turnout_type = (
+        hist[hist["election_type"] == target_type]
+        .groupby("code_bv")["turnout_pct"]
+        .last()
+    )
+    bureaux = sorted(hist["code_bv"].dropna().unique())
+    records = []
+    for code_bv in bureaux:
+        record = {
+            "commune_code": str(code_bv).split("-")[0],
+            "code_bv": code_bv,
+            "election_type": target_type,
+            "election_year": target_year,
+            "round": 1,
+            "date_scrutin": f"{target_year}-01-01",
+            "prev_turnout_any_lag1": turnout_any.get(code_bv, np.nan),
+            "prev_turnout_same_type_lag1": turnout_type.get(code_bv, np.nan),
+        }
+        for cat in CANDIDATE_CATEGORIES:
+            record[f"prev_share_any_lag1_{cat}"] = last_any_share.get((code_bv, cat), np.nan)
+            record[f"prev_share_type_lag1_{cat}"] = last_type_share.get((code_bv, cat), np.nan)
+            record[f"prev_dev_to_national_any_lag1_{cat}"] = last_any_dev.get((code_bv, cat), np.nan)
+            record[f"prev_dev_to_national_type_lag1_{cat}"] = last_type_dev.get((code_bv, cat), np.nan)
+            record[f"swing_any_{cat}"] = swing_any.get((code_bv, cat), np.nan)
+        records.append(record)
+    return pd.DataFrame.from_records(records)
+def load_model() -> Path:
+    best_path = MODEL_DIR / "best_model.json"
+    if best_path.exists():
+        try:
+            payload = json.loads(best_path.read_text())
+            name = payload.get("name")
+            if name:
+                candidate = MODEL_DIR / f"{name}.joblib"
+                if candidate.exists():
+                    return candidate
+        except Exception:
+            pass
+    if (MODEL_DIR / "hist_gradient_boosting.joblib").exists():
+        return MODEL_DIR / "hist_gradient_boosting.joblib"
+    joblibs = sorted(MODEL_DIR.glob("*.joblib"))
+    if not joblibs:
+        raise FileNotFoundError("Aucun modèle trouvé dans models/. Lancez src/model/train.py.")
+    return joblibs[0]
+def load_feature_columns(path: Path, df: pd.DataFrame) -> list[str]:
+    if path.exists():
+        return json.loads(path.read_text())
+    exclude = {"commune_code", "code_bv", "election_type", "election_year", "round", "date_scrutin"}
+    return [c for c in df.columns if c not in exclude]
+def format_delta(value) -> str:
+    if value is None or (isinstance(value, float) and np.isnan(value)):
+        return "N/A"
+    sign = "+" if value >= 0 else ""
+    return f"{sign}{round(value, 1)}"
+class PredictorBackend:
+    def __init__(self, commune_code: str = COMMUNE_CODE_SETE):
+        self.commune_code = commune_code
+        self.backend = "local"
+        try:
+            self.history = load_history_from_db(commune_code)
+            self.backend = "postgres"
+            LOGGER.info("Backend PostgreSQL chargé (%s lignes)", len(self.history))
+        except Exception as exc:
+            LOGGER.warning("PostgreSQL indisponible (%s) -> fallback fichiers.", exc)
+            self.history = load_history_from_files(commune_code)
+            self.backend = "files"
+            LOGGER.info("Backend fichiers chargé (%s lignes)", len(self.history))
+        self.history = clean_history_frame(self.history)
+        self.event_stats = load_bureau_event_stats(commune_code)
+        self.commune_stats = load_commune_event_stats(commune_code)
+        self.default_rates = {}
+        self.default_rates_by_type: dict[str, dict[str, float]] = {}
+        stats = self.commune_stats if not self.commune_stats.empty else self.event_stats
+        if not stats.empty:
+            if "round" in stats.columns:
+                round1 = stats[stats["round"] == 1]
+                if not round1.empty:
+                    stats = round1
+            self.default_rates = {
+                "turnout_pct": float(stats["turnout_pct"].median(skipna=True)),
+                "blancs_pct": float(stats["blancs_pct"].median(skipna=True)),
+                "nuls_pct": float(stats["nuls_pct"].median(skipna=True)),
+            }
+            if "election_type" in stats.columns:
+                for etype, group in stats.groupby("election_type"):
+                    self.default_rates_by_type[str(etype)] = {
+                        "turnout_pct": float(group["turnout_pct"].median(skipna=True)),
+                        "blancs_pct": float(group["blancs_pct"].median(skipna=True)),
+                        "nuls_pct": float(group["nuls_pct"].median(skipna=True)),
+                    }
+        self.model_path = load_model()
+        self.model = joblib.load(self.model_path)
+        # feature cache per target
+        self.refs_cache: Dict[Tuple[str, int], Dict[str, Dict[Tuple[str, str], float]]] = {}
+    def available_bureaux(self) -> list[str]:
+        return sorted(self.history["code_bv"].dropna().unique().tolist())
+    def available_targets(self) -> list[Tuple[str, int]]:
+        existing = set()
+        for row in self.history.itertuples(index=False):
+            try:
+                year = int(row.election_year) # type: ignore
+            except Exception:
+                continue
+            existing.add((row.election_type, year))
+        for t in DEFAULT_TARGETS:
+            existing.add(t)
+        return sorted(existing, key=lambda x: (x[1], x[0]))
+    def _get_features_and_refs(self, target_type: str, target_year: int) -> Tuple[pd.DataFrame, Dict[str, Dict[Tuple[str, str], float]]]:
+        key = (target_type, target_year)
+        if key not in FEATURE_CACHE:
+            feature_df = build_features_from_history(self.history, target_type, target_year)
+            refs = references_from_history(self.history, target_year)
+            FEATURE_CACHE[key] = (feature_df, refs)
+        return FEATURE_CACHE[key]
+    def predict_bureau_details(
+        self,
+        code_bv: str,
+        target_type: str,
+        target_year: int,
+        inscrits_override: float | None = None,
+    ) -> Tuple[Dict[str, object] | None, str, str]:
+        feature_df, _ = self._get_features_and_refs(target_type, target_year)
+        if feature_df.empty:
+            return None, "Données insuffisantes", ""
+        row = feature_df[feature_df["code_bv"] == code_bv].copy()
+        if row.empty:
+            return None, "Bureau non trouvé dans l'historique.", ""
+        feature_cols = load_feature_columns(FEATURE_COLS_PATH, feature_df)
+        missing = [c for c in feature_cols if c not in row.columns]
+        for col in missing:
+            row[col] = np.nan
+        preds = self.model.predict(row[feature_cols])
+        preds = np.clip(preds, 0, 1)
+        sums = preds.sum(axis=1, keepdims=True)
+        sums[sums == 0] = 1
+        preds = preds / sums
+        preds_share = preds.flatten()
+        preds_by_cat = {cat: float(preds_share[idx]) for idx, cat in enumerate(CANDIDATE_CATEGORIES)}
+        ordered = ordered_categories()
+        share_vec = np.array([preds_by_cat.get(cat, 0.0) for cat in ordered], dtype=float)
+        stats = self.event_stats[self.event_stats["code_bv"] == code_bv].sort_values("date_scrutin")
+        inscrits_used = None
+        if inscrits_override is not None:
+            try:
+                value = float(inscrits_override)
+                if value > 0:
+                    inscrits_used = value
+            except (TypeError, ValueError):
+                inscrits_used = None
+        if inscrits_used is None and not stats.empty:
+            serie = pd.to_numeric(stats["inscrits"], errors="coerce").dropna()
+            if not serie.empty:
+                inscrits_used = float(serie.iloc[-1])
+        if inscrits_used is None:
+            return None, "Inscrits indisponibles pour ce bureau.", ""
+        def pick_rate(col: str) -> float:
+            default = self.default_rates.get(col, 0.0)
+            default = 0.0 if default is None or np.isnan(default) else float(default)
+            type_default = self.default_rates_by_type.get(target_type, {}).get(col)
+            if type_default is None or np.isnan(type_default):
+                type_default = default
+            bureau_scoped = self.event_stats
+            if not bureau_scoped.empty and "round" in bureau_scoped.columns:
+                round1 = bureau_scoped[bureau_scoped["round"] == 1]
+                if not round1.empty:
+                    bureau_scoped = round1
+            series = None
+            years = None
+            if (
+                not bureau_scoped.empty
+                and col in bureau_scoped.columns
+                and "election_type" in bureau_scoped.columns
+            ):
+                if target_type in bureau_scoped["election_type"].values:
+                    mask = bureau_scoped["election_type"] == target_type
+                    series = bureau_scoped.loc[mask, col]
+                    years = bureau_scoped.loc[mask, "election_year"]
+            if series is None and not self.commune_stats.empty and col in self.commune_stats.columns:
+                commune_scoped = self.commune_stats
+                if "round" in commune_scoped.columns:
+                    round1 = commune_scoped[commune_scoped["round"] == 1]
+                    if not round1.empty:
+                        commune_scoped = round1
+                if target_type in commune_scoped["election_type"].values:
+                    mask = commune_scoped["election_type"] == target_type
+                    series = commune_scoped.loc[mask, col]
+                    years = commune_scoped.loc[mask, "election_year"]
+                else:
+                    series = commune_scoped[col]
+                    years = commune_scoped["election_year"]
+            if series is None:
+                if bureau_scoped.empty or col not in bureau_scoped.columns:
+                    return type_default
+                series = bureau_scoped[col]
+                years = bureau_scoped["election_year"]
+            rate = _project_rate(series, years, target_year)
+            if rate is None or np.isnan(rate):
+                return type_default
+            return float(rate)
+        turnout_rate = pick_rate("turnout_pct")
+        blancs_rate = pick_rate("blancs_pct")
+        nuls_rate = pick_rate("nuls_pct")
+        if blancs_rate + nuls_rate > turnout_rate and (blancs_rate + nuls_rate) > 0:
+            scale = turnout_rate / (blancs_rate + nuls_rate)
+            blancs_rate *= scale
+            nuls_rate *= scale
+        inscrits_total = int(round(inscrits_used))
+        votants_total = int(round(inscrits_total * turnout_rate))
+        blancs_total = int(round(inscrits_total * blancs_rate))
+        nuls_total = int(round(inscrits_total * nuls_rate))
+        if blancs_total + nuls_total > votants_total and (blancs_total + nuls_total) > 0:
+            scale = votants_total / (blancs_total + nuls_total)
+            blancs_total = int(round(blancs_total * scale))
+            nuls_total = int(round(nuls_total * scale))
+        exprimes_total = max(0, votants_total - blancs_total - nuls_total)
+        abstention_total = max(0, inscrits_total - votants_total)
+        bloc_counts = _allocate_counts(share_vec, exprimes_total)
+        counts_by_cat = {cat: int(count) for cat, count in zip(ordered, bloc_counts)}
+        counts_by_cat.update(
+            {
+                "blancs": int(blancs_total),
+                "nuls": int(nuls_total),
+                "abstention": int(abstention_total),
+            }
+        )
+        backend_label = format_backend_label(self.backend)
+        meta = (
+            f"Inscrits utilisés : {inscrits_total} | Votants : {votants_total} | "
+            f"Blancs : {blancs_total} | Nuls : {nuls_total} | Abstentions : {abstention_total}"
+        )
+        details = {
+            "shares_by_cat": preds_by_cat,
+            "share_vec": share_vec,
+            "ordered": ordered,
+            "counts": counts_by_cat,
+            "totals": {
+                "inscrits": inscrits_total,
+                "votants": votants_total,
+                "blancs": blancs_total,
+                "nuls": nuls_total,
+                "abstention": abstention_total,
+                "exprimes": exprimes_total,
+            },
+        }
+        return details, backend_label, meta
+    def predict_bureau(
+        self,
+        code_bv: str,
+        target_type: str,
+        target_year: int,
+        inscrits_override: float | None = None,
+    ) -> Tuple[pd.DataFrame, str, str]:
+        details, backend_label, meta = self.predict_bureau_details(
+            code_bv,
+            target_type,
+            target_year,
+            inscrits_override,
+        )
+        if details is None:
+            return pd.DataFrame(), backend_label, ""
+        counts_by_cat = details["counts"]
+        ordered = details["ordered"]
+        rows = []
+        for cat in ordered:
+            rows.append(
+                {
+                    "categorie": DISPLAY_CATEGORY_LABELS.get(cat, cat),
+                    "nombre": int(counts_by_cat.get(cat, 0)),
+                }
+            )
+        for extra in ["blancs", "nuls", "abstention"]:
+            rows.append(
+                {
+                    "categorie": DISPLAY_CATEGORY_LABELS[extra],
+                    "nombre": int(counts_by_cat.get(extra, 0)),
+                }
+            )
+        return pd.DataFrame(rows), backend_label, meta
+def build_bar_chart(
+    df: pd.DataFrame,
+    value_col: str,
+    *,
+    color: str = "#3b82f6",
+    color_map: Dict[str, str] | None = None,
+    category_col: str = "categorie",
+    ylabel: str = "Score (%)",
+):
+    try:
+        import matplotlib.pyplot as plt
+    except Exception:
+        return None
+    if df.empty or value_col not in df.columns:
+        return None
+    plt.figure(figsize=(6, 3))
+    labels = df[category_col].astype(str).tolist() if category_col in df.columns else []
+    if color_map:
+        colors = [color_map.get(label, color) for label in labels]
+    else:
+        colors = color
+    plt.bar(labels, df[value_col], color=colors)
+    plt.xticks(rotation=30, ha="right")
+    plt.ylabel(ylabel)
+    plt.tight_layout()
+    return plt
+def create_interface() -> gr.Blocks:
+    backend = PredictorBackend()
+    bureau_choices = build_bureau_choices(backend.history)
+    bureau_labels = [label for label, _ in bureau_choices]
+    bureau_map = {label: value for label, value in bureau_choices}
+    bureau_label_by_code = {value: label for label, value in bureau_choices}
+    targets = backend.available_targets()
+    target_labels = [f"{t} {y}" for t, y in targets]
+    history_choices = build_history_choices(backend.history)
+    history_labels = [label for label, _ in history_choices]
+    history_map = {label: value for label, value in history_choices}
+    if ("municipales", 2026) in targets:
+        default_target = "municipales 2026"
+    elif targets:
+        default_target = f"{targets[-1][0]} {targets[-1][1]}"
+    else:
+        default_target = "municipales 2026"
+    default_bv = bureau_labels[0] if bureau_labels else None
+    default_history = history_labels[-1] if history_labels else None
+    backend_label = format_backend_label(backend.backend)
+    residual_payload = load_residual_intervals()
+    residuals = residual_payload.get("residuals", {}) if isinstance(residual_payload, dict) else {}
+    residual_model = residual_payload.get("model", "inconnu") if isinstance(residual_payload, dict) else "inconnu"
+    interval_choices = list(INTERVAL_BANDS.keys()) or ["80% (p10-p90)"]
+    interval_default = interval_choices[0]
+    bloc_labels = [DISPLAY_CATEGORY_LABELS.get(cat, cat) for cat in ordered_categories()]
+    with gr.Blocks(title="Prévision Municipales — Ville de Sète") as demo:
+        gr.Markdown(
+            """
+            # Prévision Municipales — Ville de Sète
+            Choisissez un bureau de vote et une élection cible.
+            Le modèle estime un volume par catégorie politique, ainsi que les abstentions, blancs et nuls.
+            """
+        )
+        with gr.Tabs():
+            with gr.Tab("Prévisions"):
+                with gr.Row():
+                    bureau_dd = gr.Dropdown(choices=bureau_labels, value=default_bv, label="Bureau de vote")
+                    target_dd = gr.Dropdown(choices=target_labels, value=default_target, label="Élection cible (type année)")
+                    inscrits_in = gr.Number(value=None, label="Inscrits (optionnel)", precision=0)
+                predict_btn = gr.Button("Prédire")
+                source_box = gr.Markdown(value=f"Source des données : {backend_label}")
+                output_df = gr.Dataframe(
+                    headers=PREDICTION_OUTPUT_COLUMNS,
+                    label="Prédictions (nombres)",
+                )
+                chart = gr.Plot()
+            with gr.Tab("Historique"):
+                gr.Markdown(
+                    """
+                    Consultation des résultats passés (sans machine learning).
+                    Sélectionnez un bureau et une élection pour afficher l'histogramme des parts par tendance politique.
+                    """
+                )
+                with gr.Row():
+                    history_bureau_dd = gr.Dropdown(choices=bureau_labels, value=default_bv, label="Bureau de vote")
+                    history_election_dd = gr.Dropdown(
+                        choices=history_labels,
+                        value=default_history,
+                        label="Élection (type année tour)",
+                    )
+                history_btn = gr.Button("Afficher l'historique")
+                history_source = gr.Markdown(value=f"Source des données : {backend_label}")
+                history_df = gr.Dataframe(headers=HISTORY_OUTPUT_COLUMNS, label="Résultats historiques")
+                history_chart = gr.Plot()
+                history_meta = gr.Markdown()
+            with gr.Tab("Carte"):
+                gr.Markdown(
+                    """
+                    Carte des bureaux de vote de Sète.
+                    Cliquez sur un polygone pour afficher la prédiction (table + graphique).
+                    """
+                )
+                map_legend = gr.HTML(value=build_map_legend_html())
+                with gr.Row():
+                    map_target_dd = gr.Dropdown(
+                        choices=target_labels,
+                        value=default_target,
+                        label="Élection cible (type année)",
+                    )
+                    map_btn = gr.Button("Afficher la carte")
+                map_html = gr.HTML(value="<p>Cliquez sur 'Afficher la carte' pour charger la carte.</p>")
+            with gr.Tab("Stratégie"):
+                gr.Markdown(
+                    """
+                    Analyse stratégique par bureau : intervalles d'incertitude issus des résidus CV,
+                    puis simulateur de transferts pour estimer des bascules potentielles.
+                    """
+                )
+                with gr.Row():
+                    strategy_bureau_dd = gr.Dropdown(choices=bureau_labels, value=default_bv, label="Bureau de vote")
+                    strategy_target_dd = gr.Dropdown(
+                        choices=target_labels,
+                        value=default_target,
+                        label="Élection cible (type année)",
+                    )
+                    strategy_inscrits_in = gr.Number(value=None, label="Inscrits (optionnel)", precision=0)
+                    interval_dd = gr.Dropdown(
+                        choices=interval_choices,
+                        value=interval_default,
+                        label="Intervalle CV",
+                    )
+                strategy_btn = gr.Button("Analyser l'incertitude")
+                interval_source = gr.Markdown(
+                    value=(
+                        f"Intervalle CV basé sur le modèle : {residual_model}"
+                        if residuals
+                        else "Intervalle CV indisponible (fallback ±3%)."
+                    )
+                )
+                interval_df = gr.Dataframe(
+                    headers=INTERVAL_OUTPUT_COLUMNS,
+                    label="Plage empirique par bloc",
+                )
+                interval_chart = gr.Plot()
+                gr.Markdown("### Simulateur de transferts (points d'inscrits)")
+                with gr.Row():
+                    target_bloc_dd = gr.Dropdown(choices=bloc_labels, value=bloc_labels[0] if bloc_labels else None, label="Bloc cible")
+                with gr.Row():
+                    source_1_dd = gr.Dropdown(choices=TRANSFER_CATEGORY_LABELS, value=DISPLAY_CATEGORY_LABELS["abstention"], label="Source 1")
+                    target_1_dd = gr.Dropdown(choices=TRANSFER_CATEGORY_LABELS, value=DISPLAY_CATEGORY_LABELS["droite_dure"], label="Cible 1")
+                    delta_1 = gr.Slider(minimum=0, maximum=10, value=3, step=0.1, label="Delta 1 (points %)")
+                with gr.Row():
+                    source_2_dd = gr.Dropdown(choices=TRANSFER_CATEGORY_LABELS, value=DISPLAY_CATEGORY_LABELS["droite_modere"], label="Source 2")
+                    target_2_dd = gr.Dropdown(choices=TRANSFER_CATEGORY_LABELS, value=DISPLAY_CATEGORY_LABELS["gauche_modere"], label="Cible 2")
+                    delta_2 = gr.Slider(minimum=0, maximum=10, value=3, step=0.1, label="Delta 2 (points %)")
+                simulate_btn = gr.Button("Simuler les transferts")
+                sim_df = gr.Dataframe(headers=SIM_OUTPUT_COLUMNS, label="Simulation par catégorie")
+                sim_chart = gr.Plot()
+                opportunity_df = gr.Dataframe(headers=OPPORTUNITY_OUTPUT_COLUMNS, label="Bureaux à potentiel (trié)")
+        def _predict(bv_label: str, target_label: str, inscrits_override: float | None):
+            if not bv_label or not target_label:
+                return pd.DataFrame(), "Entrée invalide", None
+            code_bv = bureau_map.get(bv_label)
+            if not code_bv:
+                return pd.DataFrame(), "Bureau invalide", None
+            try:
+                parts = target_label.split()
+                target_type, target_year = parts[0].lower(), int(parts[1])
+            except Exception:
+                target_type, target_year = "municipales", 2026
+            df, backend_label, meta = backend.predict_bureau(code_bv, target_type, target_year, inscrits_override)
+            plot = build_bar_chart(
+                df,
+                value_col="nombre",
+                ylabel="Nombre d'électeurs",
+                color_map=DISPLAY_LABEL_COLORS,
+            )
+            meta_label = f" | {meta}" if meta else ""
+            return df, f"Source des données : {backend_label}{meta_label}", plot
+        def _parse_target_label(target_label: str) -> Tuple[str, int]:
+            try:
+                parts = target_label.split()
+                return parts[0].lower(), int(parts[1])
+            except Exception:
+                return "municipales", 2026
+        def _map(target_label: str):
+            if not target_label:
+                return "<p>Élection invalide.</p>"
+            target_type, target_year = _parse_target_label(target_label)
+            return build_bureau_map_html(backend, target_type, target_year)
+        def _history(bv_label: str, election_label: str):
+            if not bv_label or not election_label:
+                empty = pd.DataFrame(columns=HISTORY_OUTPUT_COLUMNS)
+                return empty, "Entrée invalide", None, ""
+            code_bv = bureau_map.get(bv_label)
+            if not code_bv:
+                empty = pd.DataFrame(columns=HISTORY_OUTPUT_COLUMNS)
+                return empty, "Bureau invalide", None, ""
+            election_key = history_map.get(election_label)
+            if not election_key:
+                empty = pd.DataFrame(columns=HISTORY_OUTPUT_COLUMNS)
+                return empty, "Élection invalide", None, ""
+            try:
+                election_type, election_year, round_num = parse_election_key(election_key)
+            except Exception:
+                empty = pd.DataFrame(columns=HISTORY_OUTPUT_COLUMNS)
+                return empty, "Élection invalide", None, ""
+            history_slice = backend.history[
+                (backend.history["code_bv"] == code_bv)
+                & (backend.history["election_type"] == election_type)
+                & (backend.history["election_year"] == election_year)
+                & (backend.history["round"] == round_num)
+            ].copy()
+            if history_slice.empty:
+                empty = pd.DataFrame(columns=HISTORY_OUTPUT_COLUMNS)
+                return empty, f"Source des données : {backend_label}", None, "Aucun résultat pour ce bureau."
+            table = prepare_history_table(history_slice)
+            plot = build_bar_chart(
+                table,
+                value_col="score_%",
+                ylabel="Score (%)",
+                color_map=DISPLAY_LABEL_COLORS,
+            )
+            meta = format_history_meta(history_slice)
+            return table, f"Source des données : {backend_label}", plot, meta
+        def _strategy_interval(
+            bv_label: str,
+            target_label: str,
+            inscrits_override: float | None,
+            band_label: str,
+        ):
+            empty = pd.DataFrame(columns=INTERVAL_OUTPUT_COLUMNS)
+            if not bv_label or not target_label:
+                return empty, "Entrée invalide", None
+            code_bv = bureau_map.get(bv_label)
+            if not code_bv:
+                return empty, "Bureau invalide", None
+            target_type, target_year = _parse_target_label(target_label)
+            details, backend_label_local, _ = backend.predict_bureau_details(
+                code_bv,
+                target_type,
+                target_year,
+                inscrits_override,
+            )
+            if details is None:
+                return empty, backend_label_local, None
+            totals = details["totals"]
+            exprimes_total = int(totals.get("exprimes", 0))
+            table = build_interval_table(
+                details["shares_by_cat"],
+                exprimes_total,
+                residuals,
+                band_label,
+            )
+            plot = build_interval_chart(table, color_map=DISPLAY_LABEL_COLORS)
+            source = (
+                f"Intervalle CV ({band_label}) basé sur le modèle : {residual_model}"
+                if residuals
+                else "Intervalle CV indisponible (fallback ±3%)."
+            )
+            return table, source, plot
+        def _strategy_simulate(
+            bv_label: str,
+            target_label: str,
+            inscrits_override: float | None,
+            bloc_cible_label: str,
+            source_1: str,
+            target_1: str,
+            delta_1_val: float,
+            source_2: str,
+            target_2: str,
+            delta_2_val: float,
+        ):
+            empty_sim = pd.DataFrame(columns=SIM_OUTPUT_COLUMNS)
+            empty_oppo = pd.DataFrame(columns=OPPORTUNITY_OUTPUT_COLUMNS)
+            if not bv_label or not target_label:
+                return empty_sim, None, empty_oppo
+            code_bv = bureau_map.get(bv_label)
+            if not code_bv:
+                return empty_sim, None, empty_oppo
+            target_type, target_year = _parse_target_label(target_label)
+            details, _, _ = backend.predict_bureau_details(
+                code_bv,
+                target_type,
+                target_year,
+                inscrits_override,
+            )
+            if details is None:
+                return empty_sim, None, empty_oppo
+            transfers = []
+            for src_label, dst_label, delta in [
+                (source_1, target_1, delta_1_val),
+                (source_2, target_2, delta_2_val),
+            ]:
+                src_key = CATEGORY_LABEL_TO_KEY.get(src_label)
+                dst_key = CATEGORY_LABEL_TO_KEY.get(dst_label)
+                if src_key and dst_key and delta and delta > 0:
+                    transfers.append((src_key, dst_key, float(delta)))
+            counts = details["counts"]
+            totals = details["totals"]
+            inscrits_total = int(totals.get("inscrits", 0))
+            updated = apply_transfers(counts, inscrits_total, transfers)
+            sim_table = build_simulation_table(counts, updated)
+            sim_plot = build_bar_chart(
+                sim_table,
+                value_col="apres_transfert",
+                ylabel="Nombre d'électeurs",
+                color_map=DISPLAY_LABEL_COLORS,
+            )
+            target_bloc = CATEGORY_LABEL_TO_KEY.get(bloc_cible_label, bloc_cible_label)
+            opp_rows = []
+            if target_bloc in ordered_categories():
+                for bv_code in backend.available_bureaux():
+                    override = inscrits_override if bv_code == code_bv else None
+                    bv_details, _, _ = backend.predict_bureau_details(
+                        bv_code,
+                        target_type,
+                        target_year,
+                        override,
+                    )
+                    if bv_details is None:
+                        continue
+                    base_counts = bv_details["counts"]
+                    bv_totals = bv_details["totals"]
+                    bv_inscrits = int(bv_totals.get("inscrits", 0))
+                    updated_counts = apply_transfers(base_counts, bv_inscrits, transfers)
+                    bloc_counts = {cat: int(base_counts.get(cat, 0)) for cat in ordered_categories()}
+                    updated_blocs = {cat: int(updated_counts.get(cat, 0)) for cat in ordered_categories()}
+                    top_base = max(bloc_counts, key=bloc_counts.get) if bloc_counts else None
+                    top_after = max(updated_blocs, key=updated_blocs.get) if updated_blocs else None
+                    gain = int(updated_counts.get(target_bloc, 0) - base_counts.get(target_bloc, 0))
+                    opp_rows.append(
+                        {
+                            "bureau": bureau_label_by_code.get(bv_code, bv_code),
+                            "gain_cible": gain,
+                            "score_base": int(base_counts.get(target_bloc, 0)),
+                            "score_apres": int(updated_counts.get(target_bloc, 0)),
+                            "top_base": DISPLAY_CATEGORY_LABELS.get(top_base, top_base),
+                            "top_apres": DISPLAY_CATEGORY_LABELS.get(top_after, top_after),
+                            "bascule": "oui" if top_base != target_bloc and top_after == target_bloc else "non",
+                        }
+                    )
+            opp_df = pd.DataFrame(opp_rows, columns=OPPORTUNITY_OUTPUT_COLUMNS)
+            if not opp_df.empty:
+                opp_df = opp_df.sort_values(["bascule", "gain_cible"], ascending=[False, False])
+            return sim_table, sim_plot, opp_df
+        predict_btn.click(_predict, inputs=[bureau_dd, target_dd, inscrits_in], outputs=[output_df, source_box, chart])
+        history_btn.click(
+            _history,
+            inputs=[history_bureau_dd, history_election_dd],
+            outputs=[history_df, history_source, history_chart, history_meta],
+        )
+        map_btn.click(
+            _map,
+            inputs=[map_target_dd],
+            outputs=[map_html],
+        )
+        strategy_btn.click(
+            _strategy_interval,
+            inputs=[strategy_bureau_dd, strategy_target_dd, strategy_inscrits_in, interval_dd],
+            outputs=[interval_df, interval_source, interval_chart],
+        )
+        simulate_btn.click(
+            _strategy_simulate,
+            inputs=[
+                strategy_bureau_dd,
+                strategy_target_dd,
+                strategy_inscrits_in,
+                target_bloc_dd,
+                source_1_dd,
+                target_1_dd,
+                delta_1,
+                source_2_dd,
+                target_2_dd,
+                delta_2,
+            ],
+            outputs=[sim_df, sim_chart, opportunity_df],
+        )
+    return demo
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s")
+    demo = create_interface()
+    demo.launch(server_name="0.0.0.0", server_port=7860)

config/communes.yaml ADDED Viewed

	@@ -0,0 +1,11 @@

+communes:
+#  - code_insee: "34003"
+#   nom: "Agde"
+#  - code_insee: "34101"
+#    nom: "Florensac"
+#  - code_insee: "34199"
+#    nom: "Pezenas"
+#  - code_insee: "34300"
+#    nom: "Servian"
+  - code_insee: "34301"
+    nom: "Sete"

config/nuances.yaml ADDED Viewed

	@@ -0,0 +1,14 @@

+# Mapping des nuances politiques vers les blocs.
+#
+# - base_mapping: chemin vers le CSV historique (optionnel).
+# - overrides: liste d'ajouts/surcharges pour des nuances absentes ou nouvelles.
+# - mapping: mapping complet si vous ne voulez pas utiliser base_mapping.
+base_mapping: data/mapping_candidats_blocs.csv
+# Exemple d'ajout/surcharge :
+# overrides:
+#   - code_candidature: "XYZ"
+#     nom_candidature: "Exemple de nuance"
+#     blocs: [gauche_modere, centre]
+overrides: []

config/raw_sources.yaml ADDED Viewed

	@@ -0,0 +1,344 @@

+14_EU.csv:
+  code_bv_cols:
+  - Code de la commune
+  - N° de bureau de vote
+  date_scrutin: '2014-05-25'
+  rename_map:
+    Code nuance du candidat: code_candidature
+    ExprimÃ©s: exprimes
+    Exprimés: exprimes
+    Inscrits: inscrits
+    Nom du candidat: nom_candidature
+    Nombre de voix du candidat: voix
+    Prénom du candidat: nom_candidature
+    Voix: voix
+    Votants: votants
+  tour_column: N° tour
+  type_scrutin: europeennes
+14_MN14_T1T2.csv:
+  code_bv_cols:
+  - Code commune
+  - N° de bureau de vote
+  date_scrutin: '2014-03-23'
+  rename_map:
+    Code nuance de la liste: code_candidature
+    Exprimés: exprimes
+    Inscrits: inscrits
+    Nom du candidat tête de liste: nom_candidature
+    Nombre de voix: voix
+    Prénom du candidat  tête de liste: nom_candidature
+    Votants: votants
+  tour_column: N° tour
+  type_scrutin: municipales
+17_L_T1.csv:
+  code_bv_cols:
+  - Code de la commune
+  - Code du b.vote
+  date_scrutin: '2017-06-11'
+  rename_map:
+    Abstentions: abstentions
+    Blancs: blancs
+    Exprimés: exprimes
+    Inscrits: inscrits
+    Nom: nom_candidature
+    Nuance: code_candidature
+    Nuls: nuls
+    Voix: voix
+    Votants: votants
+  tour: 1
+  type_scrutin: legislatives
+17_L_T2.csv:
+  code_bv_cols:
+  - Code de la commune
+  - Code du b.vote
+  date_scrutin: '2017-06-18'
+  rename_map:
+    Abstentions: abstentions
+    Blancs: blancs
+    Exprimés: exprimes
+    Inscrits: inscrits
+    Nom: nom_candidature
+    Nuance: code_candidature
+    Nuls: nuls
+    Voix: voix
+    Votants: votants
+  tour: 2
+  type_scrutin: legislatives
+17_PR_T1.csv:
+  code_bv_cols:
+  - Code de la commune
+  - Code du b.vote
+  date_scrutin: '2017-04-23'
+  rename_map:
+    Abstentions: abstentions
+    Blancs: blancs
+    Code nuance du candidat: code_candidature
+    Exprimés: exprimes
+    Inscrits: inscrits
+    Nom: nom_candidature
+    Nuls: nuls
+    Voix: voix
+    Votants: votants
+  tour: 1
+  type_scrutin: presidentielles
+17_PR_T2.csv:
+  code_bv_cols:
+  - Code de la commune
+  - Code du b.vote
+  date_scrutin: '2017-05-07'
+  rename_map:
+    Abstentions: abstentions
+    Blancs: blancs
+    Code nuance du candidat: code_candidature
+    Exprimés: exprimes
+    Inscrits: inscrits
+    Nom: nom_candidature
+    Nuls: nuls
+    Voix: voix
+    Votants: votants
+  tour: 2
+  type_scrutin: presidentielles
+19_EU.csv:
+  code_bv_cols:
+  - Code de la commune
+  - Code du b.vote
+  date_scrutin: '2019-05-26'
+  rename_map:
+    Abstentions: abstentions
+    Blancs: blancs
+    Exprimés: exprimes
+    Inscrits: inscrits
+    Nom Tête de Liste: nom_candidature
+    Nuance Liste: code_candidature
+    Nuls: nuls
+    Voix: voix
+    Votants: votants
+  tour: 1
+  type_scrutin: europeennes
+20_MN_T1.csv:
+  code_bv_cols:
+  - Code de la commune
+  - Code B.Vote
+  date_scrutin: '2020-03-15'
+  rename_map:
+    Abstentions: abstentions
+    Blancs: blancs
+    Code Nuance: code_candidature
+    Exprimés: exprimes
+    Inscrits: inscrits
+    Liste: nom_candidature
+    Nom: nom_candidature
+    Nuls: nuls
+    Voix: voix
+    Votants: votants
+  sep: ;
+  tour: 1
+  type_scrutin: municipales
+20_MN_T2.csv:
+  code_bv_cols:
+  - Code de la commune
+  - Code B.Vote
+  date_scrutin: '2020-06-28'
+  rename_map:
+    Abstentions: abstentions
+    Blancs: blancs
+    Code Nuance: code_candidature
+    Exprimés: exprimes
+    Inscrits: inscrits
+    Liste: nom_candidature
+    Nom: nom_candidature
+    Nuls: nuls
+    Voix: voix
+    Votants: votants
+  tour: 2
+  type_scrutin: municipales
+21_DEP_T1.csv:
+  code_bv_cols:
+  - Code de la commune
+  - Code du b.vote
+  date_scrutin: '2021-06-20'
+  rename_map:
+    Abstentions: abstentions
+    Binôme: nom_candidature
+    Blancs: blancs
+    Exprimés: exprimes
+    Inscrits: inscrits
+    Nuance: code_candidature
+    Nuls: nuls
+    Voix: voix
+    Votants: votants
+  tour: 1
+  type_scrutin: departementales
+21_DEP_T2.csv:
+  code_bv_cols:
+  - Code de la commune
+  - Code du b.vote
+  date_scrutin: '2021-06-27'
+  rename_map:
+    Abstentions: abstentions
+    Binôme: nom_candidature
+    Blancs: blancs
+    Exprimés: exprimes
+    Inscrits: inscrits
+    Nuance: code_candidature
+    Nuls: nuls
+    Voix: voix
+    Votants: votants
+  tour: 2
+  type_scrutin: departementales
+21_REG_T1.csv:
+  code_bv_cols:
+  - Code de la commune
+  - Code du b.vote
+  date_scrutin: '2021-06-20'
+  rename_map:
+    Abstentions: abstentions
+    Blancs: blancs
+    Exprimés: exprimes
+    Inscrits: inscrits
+    Libellé Abrégé Liste: nom_candidature
+    Nuance Liste: code_candidature
+    Nuls: nuls
+    Voix: voix
+    Votants: votants
+  tour: 1
+  type_scrutin: regionales
+21_REG_T2.csv:
+  code_bv_cols:
+  - Code de la commune
+  - Code du b.vote
+  date_scrutin: '2021-06-27'
+  rename_map:
+    Abstentions: abstentions
+    Blancs: blancs
+    Exprimés: exprimes
+    Inscrits: inscrits
+    Libellé Abrégé Liste: nom_candidature
+    Nuance Liste: code_candidature
+    Nuls: nuls
+    Voix: voix
+    Votants: votants
+  tour: 2
+  type_scrutin: regionales
+22_L_T1.csv:
+  code_bv_cols:
+  - Code de la commune
+  - Code du b.vote
+  date_scrutin: '2022-06-12'
+  rename_map:
+    Abstentions: abstentions
+    Blancs: blancs
+    Exprimés: exprimes
+    Inscrits: inscrits
+    Nom: nom_candidature
+    Nuance: code_candidature
+    Nuls: nuls
+    Voix: voix
+    Votants: votants
+  tour: 1
+  type_scrutin: legislatives
+22_L_T2.csv:
+  code_bv_cols:
+  - Code de la commune
+  - Code du b.vote
+  date_scrutin: '2022-06-19'
+  rename_map:
+    Abstentions: abstentions
+    Blancs: blancs
+    Exprimés: exprimes
+    Inscrits: inscrits
+    Nom: nom_candidature
+    Nuance: code_candidature
+    Nuls: nuls
+    Voix: voix
+    Votants: votants
+  tour: 2
+  type_scrutin: legislatives
+22_PR_T1.csv:
+  code_bv_cols:
+  - Code de la commune
+  - Code du b.vote
+  date_scrutin: '2022-04-10'
+  rename_map:
+    Abstentions: abstentions
+    Blancs: blancs
+    Code nuance du candidat: code_candidature
+    Exprimés: exprimes
+    Inscrits: inscrits
+    Nom: nom_candidature
+    Nuls: nuls
+    Voix: voix
+    Votants: votants
+  tour: 1
+  type_scrutin: presidentielles
+22_PR_T2.csv:
+  code_bv_cols:
+  - Code de la commune
+  - Code du b.vote
+  date_scrutin: '2022-04-24'
+  rename_map:
+    Abstentions: abstentions
+    Blancs: blancs
+    Code nuance du candidat: code_candidature
+    Exprimés: exprimes
+    Inscrits: inscrits
+    Nom: nom_candidature
+    Nuls: nuls
+    Voix: voix
+    Votants: votants
+  tour: 2
+  type_scrutin: presidentielles
+24_EU.csv:
+  code_bv_cols:
+  - Code commune
+  - Code BV
+  date_scrutin: '2024-06-09'
+  rename_map:
+    Abstentions: abstentions
+    Blancs: blancs
+    Exprimés: exprimes
+    Inscrits: inscrits
+    Libellé abrégé de liste 1: nom_candidature
+    Nuance liste 1: code_candidature
+    Nuls: nuls
+    Voix: voix
+    Voix 1: voix
+    Votants: votants
+  tour: 1
+  type_scrutin: europeennes
+24_L_T1.csv:
+  code_bv_cols:
+  - Code commune
+  - Code BV
+  date_scrutin: '2024-06-30'
+  rename_map:
+    Abstentions: abstentions
+    Binôme: nom_candidature
+    Blancs: blancs
+    Exprimés: exprimes
+    Inscrits: inscrits
+    Libellé Abrégé Liste: nom_candidature
+    Nuance Liste: code_candidature
+    Nuls: nuls
+    Voix: voix
+    Votants: votants
+  tour: 1
+  type_scrutin: legislatives
+24_L_T2.csv:
+  code_bv_cols:
+  - Code commune
+  - Code BV
+  date_scrutin: '2024-07-07'
+  rename_map:
+    Abstentions: abstentions
+    Binôme: nom_candidature
+    Blancs: blancs
+    Exprimés: exprimes
+    Inscrits: inscrits
+    Libellé Abrégé Liste: nom_candidature
+    Nuance Liste: code_candidature
+    Nuls: nuls
+    Voix: voix
+    Votants: votants
+  tour: 2
+  type_scrutin: legislatives

data/geo/bdv_s_te.geojson ADDED Viewed

The diff for this file is too large to render. See raw diff

data/geo/bdv_s_te.kml ADDED Viewed

	@@ -0,0 +1,1762 @@

+<kml xmlns="http://www.opengis.net/kml/2.2"><Document>
+<Placemark id="wC4Df">
+<name>Bureau 01</name><ExtendedData></ExtendedData>
+  <Polygon>
+<outerBoundaryIs>
+  <LinearRing><coordinates>3.701684,43.397393
+3.697436,43.396753
+3.697017,43.395943
+3.697007,43.395428
+3.695612,43.395241
+3.695934,43.396504
+3.696427,43.396473
+3.696578,43.396831
+3.696213,43.397159
+3.696964,43.397128
+3.697157,43.39751
+3.697157,43.398079
+3.696975,43.398211
+3.696985,43.398858
+3.696771,43.399809
+3.697093,43.401649
+3.695955,43.40182
+3.694453,43.401555
+3.694775,43.401384
+3.694743,43.401189
+3.695129,43.401212
+3.695118,43.401064
+3.695505,43.40108
+3.69573,43.400955
+3.695419,43.400745
+3.69543,43.400441
+3.695033,43.400277
+3.69514,43.400129
+3.695312,43.39995
+3.695129,43.399833
+3.695397,43.399669
+3.69514,43.399279
+3.694839,43.399295
+3.695022,43.398437
+3.695033,43.397611
+3.69529,43.397564
+3.695322,43.397736
+3.695805,43.397704
+3.695065,43.396091
+3.69411,43.396247
+3.69293,43.394867
+3.695312,43.394914
+3.699818,43.395522
+3.700311,43.395319
+3.700504,43.394929
+3.701942,43.394134
+3.702006,43.394399
+3.700676,43.395132
+3.702328,43.395257
+3.702822,43.39574
+3.702672,43.397112
+3.701684,43.397393</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
+<Placemark id="S9B2Q">
+<name>Bureau 2</name><ExtendedData>
+  <Data name="_umap_options"><value>{"color":"MediumOrchid"}</value></Data></ExtendedData>
+  <Polygon>
+<outerBoundaryIs>
+  <LinearRing><coordinates>3.694561,43.402506
+3.694389,43.402514
+3.694282,43.402569
+3.693911,43.402456
+3.694072,43.401622
+3.692425,43.401626
+3.691535,43.402896
+3.691181,43.40281
+3.691063,43.402974
+3.690355,43.402849
+3.690001,43.403325
+3.689657,43.403301
+3.689352,43.403804
+3.688483,43.403644
+3.688343,43.403906
+3.689228,43.404089
+3.689132,43.40426
+3.690258,43.404455
+3.690054,43.404615
+3.689856,43.404747
+3.690081,43.404779
+3.690382,43.40451
+3.69072,43.404701
+3.692297,43.404829
+3.692393,43.404915
+3.692688,43.404541
+3.693901,43.405055
+3.694416,43.405149
+3.694142,43.405036
+3.694287,43.403995
+3.694561,43.402506</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
+<Placemark id="oC7F2">
+<name>Bureau 3</name><ExtendedData>
+  <Data name="_umap_options"><value>{"color":"Chartreuse"}</value></Data></ExtendedData>
+  <Polygon>
+<outerBoundaryIs>
+  <LinearRing><coordinates>3.69765,43.401778
+3.698037,43.401785
+3.697999,43.401937
+3.698251,43.401941
+3.698257,43.401817
+3.698745,43.401844
+3.698879,43.401922
+3.699678,43.401945
+3.699753,43.401867
+3.700097,43.401859
+3.700118,43.400347
+3.701695,43.400238
+3.701996,43.399685
+3.701878,43.399575
+3.701695,43.39963
+3.699024,43.397494
+3.697704,43.397619
+3.69765,43.401778</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
+<Placemark id="iUben">
+<name>Bureau 5</name><ExtendedData>
+  <Data name="_umap_options"><value>{"color":"LightSkyBlue"}</value></Data></ExtendedData>
+  <Polygon>
+<outerBoundaryIs>
+  <LinearRing><coordinates>3.696084,43.406825
+3.69338,43.40656
+3.69352,43.405079
+3.694432,43.405149
+3.694142,43.40504
+3.694282,43.404073
+3.694571,43.402522
+3.694421,43.402514
+3.694271,43.402577
+3.693917,43.40246
+3.694067,43.401637
+3.694464,43.401583
+3.694689,43.40168
+3.695419,43.401727
+3.695698,43.401668
+3.695891,43.40182
+3.696631,43.401828
+3.696685,43.401672
+3.697082,43.40168
+3.697168,43.402179
+3.696524,43.405313
+3.696084,43.406825</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
+<Placemark id="rI3Wb">
+<name>Bureau 6</name><ExtendedData></ExtendedData>
+  <Polygon>
+<outerBoundaryIs>
+  <LinearRing><coordinates>3.690253,43.404459
+3.689126,43.40426
+3.689228,43.404081
+3.688332,43.403909
+3.687989,43.404354
+3.689298,43.405059
+3.689309,43.405164
+3.689239,43.405671
+3.68866,43.405605
+3.688102,43.406045
+3.693359,43.406552
+3.693531,43.405063
+3.69441,43.405157
+3.693895,43.405055
+3.692704,43.404549
+3.692372,43.404915
+3.692297,43.404829
+3.690687,43.404712
+3.690408,43.404517
+3.690076,43.404782
+3.689851,43.404751
+3.690253,43.404459</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
+<Placemark id="AaY2i">
+<name>Bureau 7</name><ExtendedData>
+  <Data name="_umap_options"><value>{"color":"Chartreuse"}</value></Data></ExtendedData>
+  <Polygon>
+<outerBoundaryIs>
+  <LinearRing><coordinates>3.695998,43.408056
+3.69514,43.409366
+3.694577,43.410207
+3.694679,43.410324
+3.694968,43.410262
+3.694936,43.41034
+3.696814,43.410659
+3.69705,43.410846
+3.69735,43.410901
+3.69765,43.410823
+3.697758,43.410496
+3.697565,43.410262
+3.697629,43.410083
+3.697876,43.410083
+3.697715,43.409864
+3.697876,43.409405
+3.698101,43.409444
+3.698133,43.409311
+3.697972,43.409272
+3.698047,43.409085
+3.698262,43.409023
+3.698326,43.408898
+3.698616,43.408945
+3.698659,43.408867
+3.698809,43.408882
+3.699163,43.408906
+3.700461,43.407573
+3.696985,43.405967
+3.695998,43.408056</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
+<Placemark id="aZn3x">
+<name>Bureau 8</name><ExtendedData></ExtendedData>
+  <Polygon>
+<outerBoundaryIs>
+  <LinearRing><coordinates>3.685505,43.400745
+3.683478,43.402506
+3.683617,43.402978
+3.682657,43.403387
+3.682415,43.402994
+3.68226,43.403029
+3.682512,43.403434
+3.6816,43.403754
+3.67954,43.404178
+3.67924,43.403952
+3.679261,43.404323
+3.679036,43.404393
+3.67895,43.40428
+3.678735,43.404369
+3.678371,43.404159
+3.67468,43.404291
+3.674455,43.403941
+3.674412,43.404307
+3.673221,43.404299
+3.673344,43.403929
+3.67262,43.403793
+3.672566,43.403964
+3.673135,43.404065
+3.673028,43.404354
+3.670903,43.404058
+3.670158,43.40403
+3.67019,43.403652
+3.670624,43.403625
+3.67122,43.403711
+3.671177,43.402997
+3.671982,43.403052
+3.672041,43.402464
+3.670802,43.402389
+3.67085,43.402085
+3.669734,43.401992
+3.669605,43.400675
+3.669884,43.400604
+3.669949,43.400199
+3.669648,43.400168
+3.669595,43.399895
+3.669412,43.399778
+3.669434,43.398866
+3.671139,43.398889
+3.671805,43.399022
+3.672631,43.398788
+3.672684,43.399178
+3.673489,43.399076
+3.673553,43.398897
+3.674058,43.399193
+3.674326,43.3991
+3.674841,43.398593
+3.67424,43.397502
+3.67969,43.396722
+3.67939,43.395849
+3.68042,43.395849
+3.680334,43.395553
+3.680634,43.395038
+3.681214,43.395272
+3.681922,43.39507
+3.682179,43.39549
+3.684711,43.395179
+3.687222,43.394602
+3.686106,43.39507
+3.68602,43.395865
+3.686557,43.396582
+3.687694,43.396722
+3.684068,43.398858
+3.683789,43.399373
+3.683295,43.399326
+3.682973,43.398593
+3.682888,43.399232
+3.680592,43.39903
+3.680506,43.399731
+3.680849,43.400261
+3.679454,43.400932
+3.679519,43.401399
+3.682652,43.400994
+3.685355,43.400355
+3.686407,43.400542
+3.68602,43.400916
+3.685505,43.400745</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
+<Placemark id="Lvrgc">
+<name>Bureau 9</name><ExtendedData></ExtendedData>
+  <Polygon>
+<outerBoundaryIs>
+  <LinearRing><coordinates>3.676193,43.394968
+3.676021,43.394929
+3.675334,43.395132
+3.674229,43.395452
+3.674304,43.395701
+3.6738,43.395631
+3.674004,43.395888
+3.672953,43.396738
+3.672684,43.396161
+3.671644,43.396473
+3.671494,43.395997
+3.671311,43.396091
+3.671064,43.395834
+3.670957,43.395841
+3.671536,43.396496
+3.670045,43.397034
+3.669294,43.396496
+3.669595,43.396356
+3.669455,43.396138
+3.668704,43.396262
+3.669133,43.397284
+3.668039,43.397517
+3.668189,43.397782
+3.667835,43.397814
+3.667309,43.396839
+3.667084,43.396964
+3.66688,43.396785
+3.665882,43.397268
+3.665518,43.397439
+3.665088,43.397018
+3.66408,43.397533
+3.663715,43.397065
+3.663501,43.397455
+3.66408,43.398196
+3.662975,43.398889
+3.661838,43.397829
+3.661623,43.397892
+3.661087,43.397829
+3.661344,43.397252
+3.660979,43.3968
+3.659177,43.395927
+3.659177,43.395771
+3.658276,43.395319
+3.658426,43.394882
+3.659134,43.395163
+3.660293,43.393962
+3.659456,43.393651
+3.659756,43.39337
+3.659155,43.393074
+3.659306,43.392653
+3.660915,43.393292
+3.662825,43.392559
+3.663275,43.392606
+3.664262,43.39178
+3.664606,43.392622
+3.664241,43.393011
+3.665164,43.393292
+3.666129,43.393027
+3.666472,43.392762
+3.668532,43.39284
+3.669326,43.393697
+3.670056,43.393994
+3.671853,43.393452
+3.672304,43.393204
+3.672692,43.392871
+3.672816,43.392707
+3.672907,43.392353
+3.672888,43.392261
+3.67277,43.392039
+3.672532,43.391752
+3.671815,43.391998
+3.671687,43.391889
+3.673041,43.391458
+3.673334,43.391322
+3.673449,43.391191
+3.673315,43.391054
+3.673406,43.390916
+3.673497,43.390899
+3.673462,43.39083
+3.673435,43.390758
+3.673468,43.390708
+3.673588,43.39069
+3.673567,43.390649
+3.673709,43.390614
+3.6738,43.390743
+3.673972,43.390702
+3.67409,43.39069
+3.67409,43.390836
+3.674176,43.390797
+3.674444,43.390821
+3.6745,43.390863
+3.674503,43.390978
+3.674567,43.390965
+3.674543,43.390733
+3.674626,43.390719
+3.674669,43.391678
+3.67542,43.392146
+3.674766,43.392629
+3.675013,43.392793
+3.676311,43.393619
+3.67718,43.394586
+3.676193,43.394968</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
+<Placemark id="ouLu8">
+<name>Bureau 10</name><ExtendedData>
+  <Data name="_umap_options"><value>{"color":"DodgerBlue"}</value></Data></ExtendedData>
+  <Polygon>
+<outerBoundaryIs>
+  <LinearRing><coordinates>3.699142,43.406349
+3.699667,43.405687
+3.699721,43.404588
+3.698401,43.404401
+3.698766,43.404237
+3.698841,43.404237
+3.698906,43.404104
+3.699389,43.40412
+3.699346,43.403691
+3.699238,43.403652
+3.699249,43.403598
+3.699378,43.40359
+3.699378,43.403504
+3.69912,43.403465
+3.699249,43.403247
+3.699431,43.403177
+3.699528,43.403192
+3.699517,43.403372
+3.699667,43.403403
+3.699678,43.403356
+3.699979,43.403496
+3.699946,43.403637
+3.699957,43.403754
+3.699818,43.403878
+3.699946,43.403909
+3.699946,43.403987
+3.700075,43.404081
+3.700161,43.404026
+3.700966,43.404447
+3.701234,43.40451
+3.702028,43.404564
+3.702135,43.402678
+3.70235,43.402709
+3.704849,43.404619
+3.705676,43.404112
+3.702757,43.401462
+3.702307,43.400869
+3.703766,43.398468
+3.706942,43.397876
+3.708701,43.398905
+3.707199,43.401867
+3.7081,43.402304
+3.711233,43.39825
+3.714023,43.398344
+3.715096,43.398811
+3.716512,43.400776
+3.723121,43.406482
+3.725696,43.407168
+3.725395,43.413185
+3.726854,43.413808
+3.724966,43.413933
+3.725309,43.416115
+3.724751,43.418016
+3.707671,43.410441
+3.699142,43.406349</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
+<Placemark id="043I7">
+<name>Bureau 11</name><ExtendedData></ExtendedData>
+  <Polygon>
+<outerBoundaryIs>
+  <LinearRing><coordinates>3.695483,43.401092
+3.695124,43.401068
+3.695092,43.401205
+3.694743,43.401197
+3.694764,43.401392
+3.694443,43.401555
+3.694099,43.401629
+3.692431,43.401618
+3.691556,43.402896
+3.691245,43.402787
+3.690977,43.402974
+3.690248,43.402865
+3.689969,43.403364
+3.689625,43.403255
+3.689303,43.403707
+3.688467,43.40366
+3.688338,43.403863
+3.687544,43.403551
+3.688037,43.402943
+3.687673,43.402896
+3.686063,43.403551
+3.685784,43.403925
+3.684626,43.403644
+3.685699,43.402428
+3.684497,43.403551
+3.683767,43.403738
+3.683488,43.402475
+3.685548,43.40076
+3.685956,43.400947
+3.686407,43.400526
+3.685398,43.400371
+3.684475,43.400573
+3.682737,43.400963
+3.679605,43.401337
+3.679454,43.400994
+3.680892,43.400261
+3.68057,43.3997
+3.68057,43.399045
+3.68293,43.399232
+3.682952,43.398562
+3.683274,43.399342
+3.683875,43.399373
+3.684111,43.398858
+3.686643,43.397471
+3.687651,43.396722
+3.687201,43.396644
+3.686578,43.396551
+3.685999,43.395865
+3.686106,43.395116
+3.687136,43.394695
+3.687737,43.394539
+3.687823,43.393541
+3.688311,43.393113
+3.689121,43.393323
+3.689057,43.394446
+3.689362,43.394485
+3.689867,43.394173
+3.6897,43.394064
+3.68992,43.393284
+3.690779,43.393514
+3.690918,43.393354
+3.691503,43.393666
+3.691396,43.393791
+3.692029,43.393955
+3.692586,43.394765
+3.69323,43.395467
+3.69337,43.395381
+3.694115,43.396239
+3.695049,43.396083
+3.6958,43.397689
+3.695328,43.397736
+3.695285,43.397556
+3.695033,43.397615
+3.695022,43.398461
+3.694839,43.399318
+3.695129,43.399279
+3.695354,43.399685
+3.695124,43.399829
+3.695301,43.399957
+3.695033,43.400269
+3.69543,43.400448
+3.69543,43.40076
+3.695703,43.400963
+3.695483,43.401092</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
+<Placemark id="YpoWc">
+<name>Bureau 12</name><ExtendedData>
+  <Data name="_umap_options"><value>{"color":"Orchid"}</value></Data></ExtendedData>
+  <Polygon>
+<outerBoundaryIs>
+  <LinearRing><coordinates>3.698788,43.417642
+3.694496,43.415242
+3.696127,43.413964
+3.694046,43.41352
+3.69308,43.41327
+3.69279,43.412849
+3.692715,43.412164
+3.693123,43.411735
+3.694153,43.411501
+3.694679,43.410348
+3.694957,43.41027
+3.694968,43.410355
+3.696781,43.410636
+3.697039,43.410823
+3.697318,43.410917
+3.697661,43.410815
+3.697758,43.410488
+3.697565,43.410277
+3.697608,43.410106
+3.697886,43.410075
+3.697715,43.409872
+3.697876,43.409428
+3.69808,43.409451
+3.698123,43.409334
+3.697994,43.409264
+3.698037,43.409089
+3.698246,43.409027
+3.698326,43.40891
+3.698595,43.408945
+3.698654,43.408871
+3.699169,43.408906
+3.700461,43.407569
+3.723378,43.417892
+3.722091,43.4221
+3.708315,43.423004
+3.706555,43.423066
+3.706555,43.422037
+3.705482,43.42185
+3.703079,43.422006
+3.698788,43.417642</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
+<Placemark id="6iuwV">
+<name>Bureau 13</name><ExtendedData></ExtendedData>
+  <Polygon>
+<outerBoundaryIs>
+  <LinearRing><coordinates>3.699099,43.406365
+3.699657,43.405687
+3.69971,43.404564
+3.698391,43.404401
+3.698777,43.404221
+3.698852,43.404221
+3.698895,43.404081
+3.699389,43.40412
+3.699356,43.403691
+3.699238,43.40366
+3.699249,43.403598
+3.699356,43.403598
+3.699378,43.403496
+3.69911,43.403457
+3.699238,43.403239
+3.699442,43.403169
+3.699528,43.4032
+3.699528,43.403372
+3.699678,43.403395
+3.699678,43.403325
+3.699968,43.403473
+3.699946,43.403629
+3.699989,43.403738
+3.699839,43.403863
+3.699968,43.403878
+3.699946,43.403964
+3.700086,43.404065
+3.700172,43.404011
+3.700279,43.404065
+3.700998,43.40444
+3.701245,43.404502
+3.701341,43.402608
+3.700118,43.402538
+3.700129,43.401875
+3.699764,43.401883
+3.699678,43.401961
+3.698895,43.401945
+3.698756,43.401859
+3.698294,43.401836
+3.69823,43.401953
+3.698015,43.401937
+3.698026,43.40182
+3.697715,43.401797
+3.697017,43.405414
+3.699099,43.406365</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
+<Placemark id="MY3Lr">
+<name>Bureau 14</name><ExtendedData></ExtendedData>
+  <Polygon>
+<outerBoundaryIs>
+  <LinearRing><coordinates>3.676322,43.393604
+3.674841,43.392653
+3.675442,43.392169
+3.674734,43.391639
+3.674669,43.390797
+3.675592,43.390782
+3.677952,43.391156
+3.677952,43.391795
+3.68012,43.392793
+3.681772,43.393214
+3.682652,43.393386
+3.683553,43.393339
+3.684046,43.392809
+3.684347,43.392193
+3.684411,43.39238
+3.685001,43.392372
+3.685591,43.392473
+3.685763,43.392325
+3.686117,43.392216
+3.686718,43.392341
+3.688048,43.392317
+3.688509,43.392169
+3.688499,43.392271
+3.690162,43.392224
+3.690097,43.392076
+3.691063,43.392045
+3.6919,43.392068
+3.691953,43.392403
+3.692222,43.392505
+3.692082,43.392575
+3.692254,43.393019
+3.692619,43.393269
+3.692898,43.393245
+3.692887,43.393362
+3.693606,43.393861
+3.693713,43.394165
+3.694046,43.394196
+3.694153,43.394344
+3.694013,43.394555
+3.694668,43.394695
+3.695322,43.394914
+3.692908,43.394851
+3.693367,43.395387
+3.693222,43.395457
+3.692594,43.394785
+3.692436,43.394526
+3.692055,43.393945
+3.691353,43.393791
+3.691535,43.393682
+3.690902,43.393347
+3.690784,43.393503
+3.689915,43.393276
+3.6897,43.394064
+3.689861,43.394165
+3.689346,43.394477
+3.689046,43.394446
+3.689132,43.393323
+3.688306,43.393105
+3.687769,43.393565
+3.687737,43.394485
+3.685226,43.395046
+3.682158,43.395522
+3.681997,43.395085
+3.681235,43.39528
+3.680506,43.395093
+3.680334,43.395506
+3.680559,43.395724
+3.68042,43.39588
+3.67969,43.395763
+3.679562,43.395607
+3.679358,43.395678
+3.679723,43.396605
+3.679519,43.396847
+3.677899,43.396941
+3.677019,43.397284
+3.676622,43.396379
+3.675978,43.396535
+3.675785,43.396161
+3.676043,43.396005
+3.675785,43.395709
+3.676064,43.395506
+3.675742,43.395023
+3.676005,43.394933
+3.676236,43.39496
+3.677201,43.394598
+3.676322,43.393604</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
+<Placemark id="DZ7yf">
+<name>Bureau 15</name><ExtendedData></ExtendedData>
+  <Polygon>
+<outerBoundaryIs>
+  <LinearRing><coordinates>3.668125,43.401805
+3.667738,43.401384
+3.667331,43.401322
+3.667052,43.401072
+3.666987,43.400495
+3.666344,43.400417
+3.665721,43.400137
+3.664992,43.400308
+3.664412,43.3997
+3.664927,43.39931
+3.665614,43.39903
+3.667674,43.398656
+3.667159,43.398281
+3.666108,43.398656
+3.665915,43.398422
+3.665507,43.398593
+3.665539,43.398827
+3.6654,43.398663
+3.665045,43.398718
+3.665067,43.398858
+3.664541,43.399084
+3.66438,43.399014
+3.663576,43.399373
+3.662996,43.398921
+3.664058,43.398196
+3.663522,43.397494
+3.663661,43.397081
+3.664091,43.397502
+3.665164,43.397018
+3.66555,43.397439
+3.666859,43.3968
+3.667095,43.396956
+3.667266,43.396878
+3.667824,43.397829
+3.668168,43.397767
+3.668039,43.39751
+3.669133,43.397291
+3.668693,43.39627
+3.669452,43.396143
+3.669586,43.39635
+3.669291,43.396496
+3.670034,43.397034
+3.671542,43.396496
+3.670962,43.395834
+3.671097,43.395849
+3.671311,43.396091
+3.671515,43.395989
+3.671644,43.396473
+3.672695,43.396165
+3.672953,43.396722
+3.673983,43.39588
+3.6738,43.395631
+3.674326,43.39567
+3.674262,43.395436
+3.675807,43.395007
+3.676085,43.395584
+3.675731,43.395709
+3.675989,43.396075
+3.675753,43.396153
+3.675989,43.396535
+3.676579,43.396325
+3.676922,43.397057
+3.675731,43.397416
+3.674326,43.397455
+3.674819,43.398562
+3.674304,43.399092
+3.674058,43.399162
+3.673586,43.398882
+3.673478,43.399076
+3.672706,43.399186
+3.672652,43.398772
+3.672051,43.398936
+3.671751,43.399014
+3.671182,43.398913
+3.669423,43.398866
+3.669391,43.399357
+3.669423,43.399794
+3.669584,43.399918
+3.669637,43.400176
+3.669895,43.400183
+3.669863,43.400581
+3.669605,43.400659
+3.66967,43.400979
+3.669723,43.401953
+3.670871,43.402046
+3.670785,43.402389
+3.67203,43.402467
+3.671987,43.403036
+3.671172,43.40299
+3.671225,43.403715
+3.670571,43.403613
+3.668125,43.401805</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
+<Placemark id="xGVYD">
+<name>Bureau 16</name><ExtendedData>
+  <Data name="_umap_options"><value>{"color":"Chartreuse"}</value></Data></ExtendedData>
+  <Polygon>
+<outerBoundaryIs>
+  <LinearRing><coordinates>3.693091,43.406544
+3.688059,43.406037
+3.688617,43.405609
+3.689175,43.405655
+3.68926,43.405063
+3.687952,43.404362
+3.688316,43.403878
+3.687458,43.403535
+3.686299,43.403785
+3.686128,43.404128
+3.686299,43.404362
+3.685634,43.404845
+3.685913,43.405297
+3.685441,43.405422
+3.685098,43.405843
+3.685784,43.406435
+3.68778,43.406201
+3.688016,43.406326
+3.687705,43.406451
+3.687812,43.40677
+3.688606,43.406864
+3.688692,43.406778
+3.689089,43.406825
+3.689142,43.406716
+3.689325,43.406856
+3.689314,43.406957
+3.689893,43.406988
+3.689904,43.406926
+3.690033,43.406949
+3.690076,43.406786
+3.691031,43.406856
+3.691245,43.407027
+3.691245,43.407136
+3.692694,43.407285
+3.693112,43.407456
+3.693144,43.407565
+3.693262,43.407573
+3.693466,43.408033
+3.69426,43.408742
+3.694646,43.40843
+3.694904,43.408664
+3.696041,43.406887
+3.693091,43.406544</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
+<Placemark id="gFNtY">
+<name>Bureau 17</name><ExtendedData></ExtendedData>
+  <Polygon>
+<outerBoundaryIs>
+  <LinearRing><coordinates>3.691058,43.407949
+3.691127,43.407534
+3.691015,43.407394
+3.691224,43.40716
+3.692683,43.407292
+3.693112,43.407479
+3.693112,43.407573
+3.693241,43.407588
+3.693434,43.408033
+3.69426,43.408742
+3.694646,43.408446
+3.694893,43.408649
+3.69441,43.409475
+3.693627,43.410371
+3.692554,43.409467
+3.69175,43.409864
+3.69117,43.409381
+3.691084,43.409303
+3.690988,43.409233
+3.690923,43.409186
+3.690966,43.408746
+3.690816,43.408461
+3.69091,43.408017
+3.691058,43.407949</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
+<Placemark id="8BP79">
+<name>Bureau 18</name><ExtendedData></ExtendedData>
+  <Polygon>
+<outerBoundaryIs>
+  <LinearRing><coordinates>3.670152,43.404026
+3.670893,43.404089
+3.670882,43.404245
+3.671955,43.404323
+3.671805,43.404954
+3.672878,43.405281
+3.672727,43.405624
+3.672566,43.405882
+3.672116,43.405819
+3.671708,43.406661
+3.670732,43.408095
+3.66864,43.407386
+3.668082,43.408235
+3.667567,43.408539
+3.667073,43.408204
+3.665936,43.408906
+3.665636,43.408641
+3.663962,43.409693
+3.66246,43.407807
+3.664391,43.406677
+3.664262,43.406037
+3.664445,43.405928
+3.66496,43.406248
+3.665142,43.405967
+3.664638,43.405648
+3.664353,43.405894
+3.664261,43.405819
+3.664553,43.405594
+3.664362,43.405496
+3.664099,43.4057
+3.663975,43.405595
+3.664235,43.405398
+3.663028,43.404151
+3.66276,43.404026
+3.662513,43.403933
+3.662417,43.403629
+3.663093,43.403528
+3.663104,43.403177
+3.663865,43.40313
+3.663983,43.403294
+3.667288,43.403489
+3.670174,43.403668
+3.670152,43.404026</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
+<Placemark id="jva7z">
+<name>Bureau 19</name><ExtendedData></ExtendedData>
+  <Polygon>
+<outerBoundaryIs>
+  <LinearRing><coordinates>3.670721,43.408126
+3.67173,43.406638
+3.672116,43.405796
+3.672577,43.405897
+3.673317,43.406279
+3.673156,43.406677
+3.675624,43.407331
+3.675742,43.406934
+3.676043,43.406903
+3.676,43.406606
+3.676375,43.406536
+3.676375,43.406201
+3.677684,43.40663
+3.678285,43.406606
+3.67851,43.406778
+3.678124,43.407479
+3.676622,43.408493
+3.676257,43.409974
+3.670721,43.408126</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
+<Placemark id="UJ4zH">
+<name>Bureau 20</name><ExtendedData></ExtendedData>
+  <Polygon>
+<outerBoundaryIs>
+  <LinearRing><coordinates>3.690891,43.409194
+3.690956,43.409194
+3.691771,43.409872
+3.692554,43.409459
+3.693627,43.410387
+3.69293,43.410698
+3.692265,43.411454
+3.69175,43.411376
+3.691063,43.41147
+3.690677,43.411408
+3.69029,43.411454
+3.690012,43.411415
+3.689904,43.411579
+3.689507,43.411618
+3.689443,43.411486
+3.68926,43.411571
+3.6891,43.4114
+3.689228,43.411314
+3.689239,43.411135
+3.688992,43.41108
+3.688756,43.41087
+3.688853,43.410831
+3.688767,43.41062
+3.688434,43.410605
+3.688091,43.410246
+3.688252,43.41002
+3.689089,43.40956
+3.689057,43.409272
+3.690548,43.408461
+3.69072,43.408563
+3.690891,43.409194</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
+<Placemark id="r7MPB">
+<name>Bureau 21</name><ExtendedData></ExtendedData>
+  <Polygon>
+<outerBoundaryIs>
+  <LinearRing><coordinates>3.688402,43.410605
+3.688767,43.410613
+3.688874,43.410823
+3.688745,43.41087
+3.688982,43.411072
+3.68926,43.411111
+3.68925,43.411306
+3.6891,43.4114
+3.689239,43.411563
+3.68911,43.411673
+3.687887,43.411852
+3.687973,43.411665
+3.687737,43.411447
+3.687544,43.411415
+3.687222,43.411026
+3.687576,43.410815
+3.687383,43.410652
+3.687179,43.410706
+3.686568,43.410067
+3.687469,43.409646
+3.687136,43.409303
+3.686804,43.409436
+3.685892,43.408384
+3.685033,43.408797
+3.684744,43.408532
+3.684937,43.408313
+3.685838,43.407892
+3.685634,43.407651
+3.686514,43.407246
+3.686353,43.406996
+3.686535,43.406957
+3.68646,43.406716
+3.686954,43.406591
+3.686879,43.40638
+3.687758,43.406193
+3.688011,43.406337
+3.687721,43.406439
+3.687807,43.406786
+3.688622,43.406864
+3.688697,43.406782
+3.689121,43.406825
+3.689164,43.40672
+3.68933,43.40686
+3.689314,43.406965
+3.689904,43.407004
+3.689931,43.40695
+3.690049,43.406954
+3.690092,43.406802
+3.691031,43.406903
+3.691246,43.407137
+3.69102,43.407414
+3.691128,43.407554
+3.691074,43.407979
+3.690908,43.408037
+3.690827,43.408474
+3.690988,43.408758
+3.690924,43.40921
+3.69058,43.408509
+3.689046,43.409296
+3.6891,43.409576
+3.68822,43.41002
+3.688091,43.410254
+3.688402,43.410605</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
+<Placemark id="8NzmY">
+<name>Bureau 22</name><ExtendedData></ExtendedData>
+  <Polygon>
+<outerBoundaryIs>
+  <LinearRing><coordinates>3.685741,43.406965
+3.681879,43.407744
+3.681686,43.407542
+3.681257,43.40762
+3.681107,43.407433
+3.680592,43.407464
+3.680613,43.407183
+3.679819,43.407183
+3.679132,43.406809
+3.678596,43.406762
+3.678339,43.406622
+3.677609,43.406638
+3.676364,43.406217
+3.676322,43.406497
+3.675978,43.406575
+3.676,43.406887
+3.675742,43.406903
+3.67557,43.407292
+3.67321,43.4067
+3.67336,43.406295
+3.672631,43.405874
+3.672867,43.405281
+3.671901,43.40497
+3.671944,43.404362
+3.670914,43.404268
+3.670893,43.404065
+3.67306,43.404362
+3.673146,43.40405
+3.672566,43.403987
+3.672631,43.403785
+3.673339,43.403925
+3.673189,43.404315
+3.674433,43.40433
+3.674476,43.403987
+3.674669,43.404315
+3.67836,43.404143
+3.678725,43.404377
+3.678972,43.404291
+3.679036,43.404393
+3.679283,43.404323
+3.67925,43.403956
+3.67954,43.40419
+3.680667,43.403941
+3.681611,43.403754
+3.682029,43.403582
+3.682523,43.403426
+3.682265,43.403036
+3.682415,43.40299
+3.682662,43.403379
+3.683628,43.402982
+3.683488,43.402514
+3.683703,43.402982
+3.683767,43.40306
+3.683639,43.403699
+3.684475,43.403574
+3.685623,43.40253
+3.685656,43.402584
+3.684551,43.403598
+3.685795,43.404026
+3.686053,43.403582
+3.686997,43.403208
+3.687683,43.402951
+3.688059,43.40299
+3.687555,43.40352
+3.686321,43.403754
+3.686106,43.404112
+3.686192,43.404346
+3.685613,43.40486
+3.685859,43.405258
+3.685377,43.405375
+3.685119,43.405889
+3.685741,43.406497
+3.686557,43.406404
+3.685741,43.406965</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
+<Placemark id="7iQ5D">
+<name>Bureau 23</name><ExtendedData></ExtendedData>
+  <Polygon>
+<outerBoundaryIs>
+  <LinearRing><coordinates>3.682984,43.413442
+3.680667,43.413987
+3.679122,43.41426
+3.677469,43.414377
+3.676718,43.413831
+3.67704,43.413442
+3.67733,43.413582
+3.677588,43.413247
+3.677341,43.413099
+3.677802,43.412514
+3.677974,43.4126
+3.678832,43.411992
+3.679068,43.412109
+3.67924,43.411587
+3.678886,43.411462
+3.679014,43.411298
+3.67887,43.411248
+3.679004,43.411104
+3.679808,43.411088
+3.681117,43.411587
+3.681332,43.411369
+3.681332,43.411033
+3.681686,43.411174
+3.681718,43.411088
+3.681514,43.41101
+3.681718,43.410706
+3.681096,43.410457
+3.681332,43.410028
+3.682834,43.410036
+3.683102,43.410231
+3.683295,43.410332
+3.683982,43.410129
+3.683692,43.40988
+3.6841,43.409615
+3.684025,43.409576
+3.68439,43.409334
+3.685452,43.410449
+3.685269,43.41055
+3.685473,43.410792
+3.684443,43.411275
+3.684529,43.411727
+3.684787,43.411899
+3.684078,43.412148
+3.684422,43.413013
+3.682984,43.413442</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
+<Placemark id="oQc1Y">
+<name>Bureau 24</name><ExtendedData>
+  <Data name="_umap_options"><value>{"color":"Chartreuse"}</value></Data></ExtendedData>
+  <Polygon>
+<outerBoundaryIs>
+  <LinearRing><coordinates>3.664713,43.417081
+3.657546,43.412093
+3.659241,43.410784
+3.661923,43.410161
+3.662347,43.410523
+3.662771,43.411505
+3.661956,43.412027
+3.661301,43.411614
+3.660829,43.411969
+3.662642,43.413208
+3.662642,43.413489
+3.662975,43.413738
+3.663232,43.413668
+3.66364,43.413917
+3.663608,43.414151
+3.663844,43.414291
+3.664273,43.414369
+3.66555,43.415289
+3.665679,43.415164
+3.665979,43.415343
+3.665807,43.415484
+3.666322,43.415834
+3.664713,43.417081</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
+<Placemark id="yDXRa">
+<name>Bureau 25</name><ExtendedData></ExtendedData>
+  <Polygon>
+<outerBoundaryIs>
+  <LinearRing><coordinates>3.66982,43.419637
+3.671257,43.420557
+3.674004,43.423518
+3.674026,43.423923
+3.675549,43.424422
+3.6763,43.423658
+3.677244,43.423658
+3.677888,43.421913
+3.677094,43.420962
+3.673983,43.418842
+3.673768,43.417175
+3.673961,43.416255
+3.674583,43.415647
+3.675302,43.415141
+3.676461,43.414696
+3.6791,43.41447
+3.681911,43.413925
+3.682501,43.413956
+3.682823,43.414057
+3.683242,43.413379
+3.680677,43.413995
+3.679068,43.414276
+3.677502,43.414361
+3.676611,43.41447
+3.675356,43.414455
+3.674852,43.414494
+3.67483,43.414572
+3.674465,43.414556
+3.674423,43.414665
+3.67395,43.414743
+3.673382,43.415468
+3.673736,43.415624
+3.673521,43.41592
+3.673221,43.415756
+3.673081,43.41592
+3.672695,43.415717
+3.672266,43.416232
+3.67292,43.416598
+3.672792,43.416801
+3.672588,43.416692
+3.671719,43.417728
+3.671955,43.417853
+3.671268,43.418648
+3.670903,43.418492
+3.67071,43.41871
+3.669788,43.418157
+3.669938,43.418001
+3.669809,43.417907
+3.669423,43.418281
+3.669863,43.418601
+3.670163,43.418796
+3.670067,43.418905
+3.670238,43.418967
+3.670152,43.419076
+3.669959,43.418991
+3.669884,43.419084
+3.66968,43.418975
+3.66938,43.4191
+3.669262,43.419045
+3.669273,43.418702
+3.668908,43.418788
+3.668575,43.419232
+3.668897,43.419559
+3.66982,43.419637</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
+<Placemark id="roqV8">
+<name>Bureau 26</name><ExtendedData>
+  <Data name="_umap_options"><value>{"color":"Crimson"}</value></Data></ExtendedData>
+  <Polygon>
+<outerBoundaryIs>
+  <LinearRing><coordinates>3.662868,43.410254
+3.665528,43.412171
+3.665496,43.412771
+3.667234,43.414057
+3.667331,43.415647
+3.666998,43.415749
+3.666215,43.415187
+3.666033,43.415297
+3.665775,43.415071
+3.666022,43.414891
+3.665282,43.414385
+3.665013,43.414548
+3.664702,43.414322
+3.664906,43.414167
+3.664713,43.413987
+3.664262,43.414346
+3.663844,43.414307
+3.663597,43.414167
+3.663629,43.413902
+3.663222,43.413652
+3.662964,43.41373
+3.662642,43.413504
+3.662642,43.413224
+3.663189,43.412865
+3.662503,43.412382
+3.662213,43.412577
+3.66172,43.412203
+3.662792,43.41147
+3.662353,43.410496
+3.662868,43.410254</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
+<Placemark id="6YOqE">
+<name>Bureau 27</name><ExtendedData></ExtendedData>
+  <Polygon>
+<outerBoundaryIs>
+  <LinearRing><coordinates>3.674819,43.414556
+3.674476,43.414548
+3.67439,43.414681
+3.673961,43.414743
+3.67336,43.415468
+3.673725,43.415639
+3.673511,43.415889
+3.673221,43.415764
+3.673092,43.415904
+3.672663,43.415733
+3.672255,43.416208
+3.67292,43.416614
+3.672781,43.416801
+3.672609,43.416707
+3.67173,43.417728
+3.671933,43.417861
+3.671247,43.418632
+3.670914,43.418492
+3.6707,43.418694
+3.669755,43.418164
+3.669949,43.418009
+3.66982,43.417923
+3.669401,43.418274
+3.670152,43.418788
+3.670056,43.418913
+3.670228,43.418944
+3.670131,43.419053
+3.669873,43.419076
+3.669659,43.418967
+3.669348,43.419107
+3.669262,43.419045
+3.669219,43.41871
+3.668865,43.418803
+3.668565,43.419193
+3.667953,43.418788
+3.667803,43.41878
+3.667427,43.419022
+3.666569,43.418546
+3.666676,43.418507
+3.667438,43.41892
+3.667749,43.418702
+3.666655,43.418016
+3.666462,43.41818
+3.666258,43.418445
+3.666097,43.418991
+3.66599,43.418959
+3.666011,43.418616
+3.666451,43.417884
+3.666054,43.417198
+3.665839,43.417214
+3.665679,43.417011
+3.665968,43.416863
+3.665979,43.41652
+3.666869,43.415889
+3.667556,43.415562
+3.667449,43.414112
+3.666043,43.412725
+3.665646,43.411977
+3.664573,43.411166
+3.663576,43.410114
+3.663136,43.40967
+3.662653,43.409553
+3.661956,43.410067
+3.661784,43.410059
+3.662417,43.409459
+3.663211,43.408906
+3.663919,43.409747
+3.665314,43.408851
+3.665861,43.40928
+3.665872,43.409529
+3.666215,43.40981
+3.666569,43.409825
+3.667063,43.410184
+3.667213,43.410371
+3.666773,43.410667
+3.666869,43.411041
+3.667234,43.411096
+3.667449,43.410979
+3.667631,43.411096
+3.667846,43.410956
+3.668232,43.41126
+3.668586,43.411143
+3.669155,43.411797
+3.668371,43.412319
+3.668844,43.412538
+3.670346,43.411462
+3.670689,43.411696
+3.671504,43.411096
+3.672287,43.411938
+3.671676,43.412351
+3.67203,43.4126
+3.671547,43.41292
+3.672287,43.413551
+3.671547,43.414034
+3.673725,43.4144
+3.674873,43.414502
+3.674819,43.414556</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
+<Placemark id="7aZin">
+<name>Bureau 28</name><ExtendedData></ExtendedData>
+  <Polygon>
+<outerBoundaryIs>
+  <LinearRing><coordinates>3.686471,43.406723
+3.686535,43.406942
+3.686374,43.407004
+3.686503,43.407238
+3.685634,43.407659
+3.685817,43.407877
+3.684947,43.408298
+3.684379,43.408937
+3.684658,43.408914
+3.684754,43.409069
+3.684626,43.409171
+3.684036,43.409576
+3.6841,43.409615
+3.683692,43.409888
+3.683968,43.410129
+3.683298,43.410332
+3.683099,43.410229
+3.682845,43.410028
+3.6813,43.41002
+3.681107,43.410449
+3.681707,43.410722
+3.681525,43.411014
+3.681718,43.411088
+3.681702,43.41117
+3.681327,43.411037
+3.681321,43.411388
+3.681123,43.411595
+3.679808,43.411082
+3.678998,43.411107
+3.678886,43.4108
+3.678435,43.410648
+3.678373,43.410515
+3.678854,43.40852
+3.679132,43.407791
+3.678145,43.407526
+3.678532,43.406778
+3.679111,43.40684
+3.679755,43.407168
+3.680592,43.407183
+3.680592,43.407479
+3.681085,43.407433
+3.681171,43.407666
+3.681686,43.407573
+3.681772,43.407776
+3.68572,43.406949
+3.686637,43.40638
+3.686868,43.406334
+3.686932,43.40656
+3.686471,43.406723</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
+<Placemark id="sJOtO">
+<name>Bureau 29</name><ExtendedData></ExtendedData>
+  <Polygon>
+<outerBoundaryIs>
+  <LinearRing><coordinates>3.663919,43.40327
+3.663822,43.403122
+3.663125,43.403177
+3.663114,43.403528
+3.662417,43.403621
+3.662524,43.403933
+3.66305,43.404151
+3.664252,43.405406
+3.663973,43.405594
+3.664099,43.405701
+3.664364,43.405497
+3.664551,43.405593
+3.664262,43.405821
+3.664352,43.405895
+3.664627,43.405648
+3.665131,43.405975
+3.664927,43.406232
+3.664445,43.405927
+3.664262,43.406035
+3.664327,43.4067
+3.66246,43.407807
+3.663232,43.408898
+3.661752,43.409989
+3.656774,43.402787
+3.658136,43.401399
+3.658544,43.401173
+3.659145,43.40048
+3.659037,43.39995
+3.659649,43.398819
+3.660765,43.399092
+3.662041,43.398009
+3.662835,43.398866
+3.663565,43.399396
+3.664391,43.398999
+3.664541,43.399092
+3.665067,43.398858
+3.665024,43.398718
+3.66541,43.39864
+3.66555,43.398819
+3.665496,43.398601
+3.665915,43.398437
+3.666086,43.398663
+3.667202,43.398281
+3.667728,43.398663
+3.665593,43.39903
+3.664863,43.399326
+3.66438,43.399669
+3.66497,43.400332
+3.665646,43.400137
+3.666354,43.400425
+3.666934,43.400441
+3.666998,43.401033
+3.667363,43.401368
+3.667728,43.401407
+3.668028,43.401758
+3.669251,43.402725
+3.670592,43.403629
+3.669852,43.40366
+3.667331,43.403489
+3.663919,43.40327</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
+<Placemark id="4Xq6M">
+<name>Bureau 30</name><ExtendedData></ExtendedData>
+  <Polygon>
+<outerBoundaryIs>
+  <LinearRing><coordinates>3.684443,43.413021
+3.684068,43.41214
+3.684829,43.411899
+3.68454,43.411735
+3.684454,43.411275
+3.685484,43.410792
+3.685259,43.41055
+3.685462,43.410449
+3.684422,43.409342
+3.684744,43.409062
+3.684669,43.408906
+3.684497,43.409038
+3.68439,43.408937
+3.684733,43.4085
+3.685033,43.408804
+3.685881,43.408376
+3.686535,43.409155
+3.686804,43.409451
+3.687136,43.409295
+3.687479,43.409646
+3.68661,43.410067
+3.687201,43.410737
+3.687394,43.410652
+3.687587,43.410839
+3.687211,43.411026
+3.687544,43.411408
+3.687748,43.411454
+3.687898,43.411556
+3.687994,43.411665
+3.687887,43.411844
+3.688155,43.411817
+3.68852,43.411762
+3.688901,43.4117
+3.689126,43.411661
+3.689448,43.411474
+3.689518,43.411618
+3.689904,43.411571
+3.690012,43.411404
+3.69029,43.41147
+3.690687,43.411384
+3.691117,43.411478
+3.691771,43.411369
+3.692232,43.41147
+3.693091,43.411743
+3.692704,43.412156
+3.692758,43.412865
+3.693048,43.413255
+3.696095,43.413987
+3.695741,43.414237
+3.694711,43.414065
+3.694614,43.414213
+3.694528,43.414213
+3.694571,43.414026
+3.693391,43.413839
+3.692833,43.414455
+3.690902,43.414572
+3.690269,43.414213
+3.689378,43.414167
+3.688681,43.414696
+3.688198,43.414595
+3.688005,43.414829
+3.687726,43.414743
+3.688027,43.414439
+3.687426,43.414198
+3.687072,43.41352
+3.686428,43.413231
+3.686213,43.413411
+3.686138,43.413372
+3.686235,43.413231
+3.685516,43.413052
+3.684754,43.413816
+3.684347,43.414057
+3.683639,43.414151
+3.682855,43.414042
+3.683231,43.413379
+3.684443,43.413021</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
+<Placemark id="oaH2G">
+<name>Bureau 31</name><ExtendedData>
+  <Data name="_umap_options"><value>{"color":"BlueViolet"}</value></Data></ExtendedData>
+  <Polygon>
+<outerBoundaryIs>
+  <LinearRing><coordinates>3.648233,43.397627
+3.649971,43.398546
+3.654606,43.400402
+3.656645,43.402023
+3.656752,43.40235
+3.654971,43.404143
+3.659145,43.400472
+3.659027,43.399942
+3.659638,43.398811
+3.660754,43.399069
+3.66202,43.398001
+3.661805,43.397829
+3.661602,43.397907
+3.660979,43.397829
+3.661333,43.397315
+3.660979,43.396816
+3.659091,43.395865
+3.659177,43.395787
+3.658254,43.395319
+3.658447,43.394836
+3.659155,43.395194
+3.660336,43.393931
+3.659477,43.393666
+3.659756,43.393354
+3.659155,43.39312
+3.659327,43.392637
+3.660979,43.393261
+3.662868,43.392544
+3.662782,43.392138
+3.661816,43.391795
+3.659906,43.391296
+3.659542,43.390205
+3.657911,43.38955
+3.655765,43.389643
+3.653941,43.389565
+3.65422,43.389175
+3.653812,43.389394
+3.648856,43.38707
+3.643341,43.392045
+3.648233,43.397627</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
+<Placemark id="h0wD5">
+<name>Bureau 32</name><ExtendedData></ExtendedData>
+  <Polygon>
+<outerBoundaryIs>
+  <LinearRing><coordinates>3.671579,43.412951
+3.671987,43.412592
+3.671708,43.412358
+3.672309,43.411938
+3.671515,43.41108
+3.6707,43.411688
+3.670399,43.411439
+3.669444,43.412117
+3.668844,43.41253
+3.668329,43.412312
+3.669155,43.411797
+3.668575,43.411143
+3.668221,43.411228
+3.667835,43.410971
+3.66762,43.41108
+3.667427,43.410979
+3.667223,43.411088
+3.666848,43.411018
+3.666751,43.410659
+3.667191,43.410379
+3.667052,43.410184
+3.666537,43.40981
+3.666204,43.409802
+3.665882,43.409537
+3.665839,43.409264
+3.665335,43.408836
+3.665646,43.408641
+3.665957,43.408914
+3.667084,43.408204
+3.667556,43.408555
+3.668103,43.40822
+3.66865,43.407355
+3.672835,43.408875
+3.675238,43.409716
+3.677652,43.41027
+3.677566,43.410558
+3.677094,43.410566
+3.67615,43.411127
+3.67557,43.410768
+3.674841,43.41108
+3.675377,43.411751
+3.672287,43.413543
+3.671579,43.412951</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
+<Placemark id="ydMJC">
+<name>Bureau 33</name><ExtendedData></ExtendedData>
+  <Polygon>
+<outerBoundaryIs>
+  <LinearRing><coordinates>3.678371,43.410519
+3.678435,43.410652
+3.678875,43.410792
+3.679004,43.411119
+3.678864,43.411244
+3.679013,43.411298
+3.678887,43.411465
+3.679237,43.411591
+3.679047,43.412125
+3.678778,43.412008
+3.677942,43.412623
+3.677802,43.412545
+3.677309,43.413083
+3.677545,43.41327
+3.677298,43.41359
+3.67703,43.413481
+3.676718,43.413839
+3.677405,43.414361
+3.6766,43.41447
+3.675431,43.414447
+3.674852,43.414486
+3.673768,43.414408
+3.671569,43.41405
+3.672266,43.413582
+3.675452,43.411727
+3.67483,43.411104
+3.67557,43.410761
+3.67601,43.411057
+3.676139,43.411135
+3.677083,43.410589
+3.677545,43.410558
+3.677673,43.410301
+3.676279,43.409958
+3.676622,43.408532
+3.678049,43.407464
+3.678424,43.407643
+3.679143,43.407721
+3.678854,43.408508
+3.678575,43.409716
+3.678371,43.410519</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark></Document></kml>

data/interim/elections_long.parquet ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:70fc51d5dd8303c51339a95f818198ba0cc5f26e2a3dc951eae664eb8953a54d
+size 2216814

data/mapping_candidats_blocs.csv ADDED Viewed

	@@ -0,0 +1,61 @@

+code_candidature;nom_candidature;bloc_1;bloc_2;bloc_3
+NC;Nuance non communiquee;centre;;
+LDIV;Divers;centre;;
+DIV;Divers;centre;;
+LDVD;Divers droite;droite_modere;droite_dure
+LDVG;Divers gauche;gauche_modere;
+LUG;Union de la gauche;;gauche_modere
+LUD;Union de la droite;droite;droite_modere
+LFN;Front national;extreme_droite;;
+LEXG;Extreme gauche;extreme_gauche;;
+LSOC;Parti socialiste;gauche_modere;
+LUMP;Union pour un mouvement populaire;droite_modere;
+LNC;Nouveau centre;centre;;
+LFG;Front de gauche;gauche_dure;
+LVEC;Europe Ecologie Les Verts;gauche_modere;;
+LUDI;Union des democrates et independants;centre;droite_modere
+LDVC;Divers centre;centre;;
+LCOM;Parti communiste;gauche_dure;
+LRN;Rassemblement national;extreme_droite;;
+LUC;Union du centre;centre;;
+LPG;Parti de gauche;gauche_dure;
+LMDM;Mouvement democrate;centre;;
+LLR;Les republicains;droite_modere;
+LEXD;Extreme droite;extreme_droite;;
+LREM;La republique en marche;centre;droite_modere
+LFI;La France insoumise;gauche_dure;;
+LECO;Ecologistes;gauche_modere;;
+LREG;Regionalistes;centre;;
+LGJ;Gilets jaunes;;
+LRDG;Radicaux de gauche;gauche_modere;centre
+LDLF;Debout la France;droite_dure;
+RN;Rassemblement national;extreme_droite;;
+LR;Les republicains;droite_modere;centre
+EELV;Europe Ecologie Les Verts;gauche_modere;;
+PS;Parti socialiste;gauche_modere;;
+UDI;Union des democrates et independants;centre;droite_modere
+PRG;Parti radical de gauche;gauche_modere;centre
+DVD;Divers droite;droite_modere;droite_dure
+DVG;Divers gauche;gauche_modere;
+EXD;Extreme droite;extreme_droite;;
+EXG;Extreme gauche;extreme_gauche;;
+FN;Front national;extreme_droite;;
+DLF;Debout la France;droite_dure;
+REM;La republique en marche;centre;droite_modere
+ENS;Ensemble;centre;droite_modere
+LENS;Ensemble;centre;droite_modere
+REC;Reconquete;extreme_droite;;
+LREC;Reconquete;extreme_droite;;
+DSV;Divers souverainiste;droite_dure;
+LDSV;Divers souverainiste;droite_dure;
+LUGE;Union de la gauche elargie;gauche_modere;
+COM;Parti communiste;gauche_dure;
+SOC;Parti socialiste;gauche_modere;;
+FI;La France insoumise;gauche_dure;;
+ECO;Ecologistes;gauche_modere;;
+DXG;Divers extreme gauche;extreme_gauche;;
+NUP;Nupes;gauche_dure;gauche_modere
+BC-COM;Binome communiste;gauche_dure;
+BC-DVD;Binome divers droite;droite_modere;droite_dure
+BC-ECO;Binome ecologiste;gauche_modere;;
+BC-RN;Binome rassemblement national;extreme_droite;;

data/mappings/category_mapping.csv ADDED Viewed

	@@ -0,0 +1,39 @@

+code_candidature;nom_candidature;bloc_1;bloc_2;bloc_3
+NC;Nuance non communiquee;centre;;
+LDIV;Divers;centre;;
+DIV;Divers;centre;;
+LDVD;Divers droite;droite_modere;droite_dure
+LDVG;Divers gauche;gauche_modere;gauche
+LUG;Union de la gauche;gauche;gauche_modere
+LUD;Union de la droite;droite;droite_modere
+LFN;Front national;extreme_droite;;
+LEXG;Extreme gauche;extreme_gauche;;
+LSOC;Parti socialiste;gauche_modere;gauche
+LUMP;Union pour un mouvement populaire;droite_modere;droite
+LNC;Nouveau centre;centre;;
+LFG;Front de gauche;gauche_dure;gauche
+LVEC;Europe Ecologie Les Verts;gauche_modere;;
+LUDI;Union des democrates et independants;centre;droite_modere
+LDVC;Divers centre;centre;;
+LCOM;Parti communiste;gauche_dure;gauche
+LRN;Rassemblement national;extreme_droite;;
+LUC;Union du centre;centre;;
+LPG;Parti de gauche;gauche_dure;gauche
+LMDM;Mouvement democrate;centre;;
+LLR;Les republicains;droite_modere;droite
+LEXD;Extreme droite;extreme_droite;;
+LREM;La republique en marche;centre;droite_modere
+LFI;La France insoumise;gauche_dure;;
+LECO;Ecologistes;gauche_modere;;
+LREG;Regionalistes;centre;;
+LGJ;Gilets jaunes;gauche;droite
+LRDG;Radicaux de gauche;gauche_modere;centre
+LDLF;Debout la France;droite_dure;droite
+RN;Rassemblement national;extreme_droite;;
+LR;Les republicains;droite_modere;centre
+EELV;Europe Ecologie Les Verts;gauche_modere;;
+PS;Parti socialiste;gauche_modere;;
+UDI;Union des democrates et independants;centre;droite_modere
+PRG;Parti radical de gauche;gauche_modere;centre
+DVD;Divers droite;droite_modere;droite_dure
+DVG;Divers gauche;gauche_modere;gauche

docker-compose.yml ADDED Viewed

	@@ -0,0 +1,38 @@

+version: "3.9"
+services:
+  postgres:
+    image: postgres:16
+    container_name: elections_postgres
+    restart: unless-stopped
+    env_file: .env
+    environment:
+      - POSTGRES_USER=${POSTGRES_USER}
+      - POSTGRES_PASSWORD=${POSTGRES_PASSWORD}
+      - POSTGRES_DB=${POSTGRES_DB}
+    ports:
+      - "${POSTGRES_PORT:-5432}:5432"
+    volumes:
+      - pgdata:/var/lib/postgresql/data
+  pgadmin:
+    image: dpage/pgadmin4:8
+    container_name: elections_pgadmin
+    restart: unless-stopped
+    depends_on:
+      - postgres
+    env_file: .env
+    environment:
+      PGADMIN_DEFAULT_EMAIL: admin@sete.fr
+      PGADMIN_DEFAULT_PASSWORD: admin
+      PGADMIN_LISTEN_PORT: 8080
+    ports:
+      - "8080:8080"
+    volumes:
+      - pgadmin_data:/var/lib/pgadmin
+    profiles:
+      - admin
+volumes:
+  pgdata:
+  pgadmin_data:

harmoniser.md ADDED Viewed

	@@ -0,0 +1,19 @@

+% Vot/Ins -> % Votants
+Code du d°partement -> Code du département
+Exprim°s -> Exprim°s 1 -> Exprimés
+Libell° Abr°g° Liste 1, Libellé abrégé de liste 1 -> Libellé Abrégé Liste 1
+Libell° Etendu Liste 1, Liste, Libellé de liste 1, Liste.1 -> Libellé Etendu Liste 1
+Libell° de la circonscription, Libellé de la circonscription
+Libell° de la commune, Libellé commune -> Libellé de la commune
+Libell° du d°partement, Libellé département -> Libellé du département
+Nom candidat 1, Nom Tête de Liste 1, Nom T°te de Liste 1, Nom.1 -> Nom 1
+Pr°nom du candidat 1, Pr°nom du candidat t°te de liste, Pr°nom.1 -> Prénom 1
+NÂ°Panneau 1, N.Pan. 1 -> N°Panneau 1

main.py ADDED Viewed

	@@ -0,0 +1,117 @@

+from __future__ import annotations
+import argparse
+import subprocess
+import sys
+from pathlib import Path
+PROJECT_ROOT = Path(__file__).resolve().parent
+PYTHON = sys.executable
+def run_step(cmd: list[str], desc: str) -> None:
+    print(f"\n=== {desc} ===")
+    result = subprocess.run(cmd, check=False)
+    if result.returncode != 0:
+        raise SystemExit(f"Echec de l'étape '{desc}' (code {result.returncode}). Commande: {' '.join(cmd)}")
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        description="Pipeline orchestration: preprocess -> features -> train -> predict",
+    )
+    parser.add_argument("--raw-dir", type=Path, default=Path("data/raw"), help="Répertoire des fichiers bruts.")
+    parser.add_argument("--mapping", type=Path, default=Path("config/nuances.yaml"), help="Mapping nuances->catégories.")
+    parser.add_argument("--target-election", type=str, default="municipales", help="Election cible (ex: municipales).")
+    parser.add_argument("--target-year", type=int, default=2026, help="Année cible.")
+    parser.add_argument("--commune-code", type=str, default="301", help="Code commune pour la prédiction (Sète=301).")
+    parser.add_argument("--skip-preprocess", action="store_true", help="Ne pas relancer le prétraitement.")
+    parser.add_argument("--skip-features", action="store_true", help="Ne pas reconstruire le panel.")
+    parser.add_argument("--skip-train", action="store_true", help="Ne pas réentraîner le modèle.")
+    parser.add_argument("--skip-predict", action="store_true", help="Ne pas générer les prédictions CSV.")
+    args = parser.parse_args()
+    interim_path = PROJECT_ROOT / "data" / "interim" / "elections_long.parquet"
+    panel_path = PROJECT_ROOT / "data" / "processed" / "panel.parquet"
+    model_path = PROJECT_ROOT / "models" / "hist_gradient_boosting.joblib"
+    if not args.skip_preprocess:
+        run_step(
+            [
+                PYTHON,
+                "-m",
+                "src.data.preprocess",
+                "--raw-dir",
+                str(args.raw_dir),
+                "--output-dir",
+                str(PROJECT_ROOT / "data" / "interim"),
+            ],
+            "Prétraitement (format long)",
+        )
+    if not args.skip_features:
+        run_step(
+            [
+                PYTHON,
+                "-m",
+                "src.features.build_features",
+                "--elections-long",
+                str(interim_path),
+                "--mapping",
+                str(args.mapping),
+                "--output",
+                str(panel_path),
+                "--output-csv",
+                str(PROJECT_ROOT / "data" / "processed" / "panel.csv"),
+            ],
+            "Construction du panel features+cibles",
+        )
+    if not args.skip_train:
+        run_step(
+            [
+                PYTHON,
+                "-m",
+                "src.model.train",
+                "--panel",
+                str(panel_path),
+                "--reports-dir",
+                str(PROJECT_ROOT / "reports"),
+                "--models-dir",
+                str(PROJECT_ROOT / "models"),
+            ],
+            "Entraînement / évaluation des modèles",
+        )
+    if not args.skip_predict:
+        run_step(
+            [
+                PYTHON,
+                "-m",
+                "src.model.predict",
+                "--model-path",
+                str(model_path),
+                "--feature-columns",
+                str(PROJECT_ROOT / "models" / "feature_columns.json"),
+                "--elections-long",
+                str(interim_path),
+                "--mapping",
+                str(args.mapping),
+                "--target-election-type",
+                args.target_election,
+                "--target-year",
+                str(args.target_year),
+                "--commune-code",
+                args.commune_code,
+                "--output-dir",
+                str(PROJECT_ROOT / "predictions"),
+            ],
+            "Génération des prédictions CSV",
+        )
+    print("\nPipeline terminé. Lance Gradio avec `python -m app.gradio_app`.")
+if __name__ == "__main__":
+    main()

mission.md ADDED Viewed

	@@ -0,0 +1,410 @@

+# Mission
+## Étape 1
+Nous créons un pipeline qui consiste à prendre en entrée des dataframes au format csv et qui les intègre dans une base de données.
+La base de données comprends toujours la liste des bureaux de vote de toute la France et tout nouveau dataframe rajouterait des colonnes.
+Dans un premier temps, on s'assure que le fichier soit importé et normalisé pour être conforme aux à la base de données pour s'assurer que la fusion puisse se passer.
+Dans un second temps le dataset est fusionné.
+## Ancien
+Tu es OpenAI Codex dans VS Code. Tu travailles dans un repo Python existant contenant des notebooks et des données dans data/raw, data/interim, data/processed. Objectif métier : au cabinet du maire de Sète, construire un outil prédictif des prochaines municipales (ex: 2026) bureau de vote par bureau de vote, basé sur l’historique électoral et une comparaison au national, puis exposer le tout via une application Gradio. Le projet doit rester opérant à long terme pour les échéances futures (pas “codé en dur” uniquement pour 2026).
+Contexte fonctionnel (à respecter strictement)
+Commune principale : Sète (outil centré sur Sète). Prévoir configuration pour étendre à d’autres communes ultérieurement (sans casser l’architecture).
+L’utilisateur de Gradio choisit :
+un bureau de vote
+une élection cible à observer (par défaut : municipales 2026, mais l’UI et le backend doivent accepter n’importe quel couple (type, année) présent / futur)
+Gradio renvoie :
+le score prédit (%) pour chaque catégorie de candidats
+entre parenthèses à côté de chaque score, la différence (en points) vs :
+la dernière élection législative avant l’élection cible (dans le contexte “municipales 2026”, c’est typiquement les législatives les plus récentes avant 2026)
+les municipales 2020
+Catégories à utiliser (cibles et affichage) :
+centre
+gauche_modere
+droite_modere
+gauche_dure
+droite_dure
+extreme_gauche
+extreme_droite
+Données & notebooks existants
+Les fichiers 01_pretraitement et 02_feature_engineering existent (notebooks dans notebooks/) et ont déjà fait un premier nettoyage / feature engineering.
+Étape 1 : vérifier que ces notebooks sont cohérents avec l’objectif final (prédire municipales 2026 + long terme + bureau par bureau + comparaisons national/local), puis industrialiser : extraire la logique dans des modules Python versionnés sous src/.
+Les datasets bruts sont dans data/raw. data/interim et data/processed sont disponibles et doivent être utilisés si pertinents (ne pas refaire inutilement ce qui existe déjà, mais corriger si c’est incohérent).
+Exigences méthodologiques non négociables
+1) Anti-fuite temporelle (time leakage)
+Pour prédire une élection cible (type, année = T), les features doivent être calculées uniquement avec des données strictement antérieures à T.
+Interdiction d’utiliser des résultats de l’élection cible dans les features.
+Les “écarts au national” doivent être calculés uniquement pour des élections antérieures, avec le score national correspondant à ces élections antérieures.
+La validation doit respecter la causalité (split temporel).
+2) Structure des données adaptée (panel)
+Ne pas rester sur “1 ligne = 1 bureau” wide naïf si cela empêche l’apprentissage.
+Implémenter un dataset panel conceptuellement : 1 ligne = (bureau, election_type, election_year) avec :
+cibles : parts de voix (%) par catégorie
+features : historiques laggés, écarts national antérieurs, participation antérieure, etc.
+3) Contraintes de sortie
+Les prédictions sont des % par catégorie :
+clip à [0, 100]
+renormaliser pour sommer à 100 (gérer somme=0)
+Alternative bonus : modéliser via log-ratios + softmax, mais renormalisation simple acceptable.
+Étape 1 — Audit & industrialisation des notebooks
+Lire et analyser notebooks/01_pretraitement.* et notebooks/02_feature_engineering.*.
+Produire un diagnostic succinct (dans reports/notebook_audit.md) :
+quelles tables/colonnes sont produites ?
+est-ce compatible avec “bureau×élection” ?
+existe-t-il des risques de leakage ?
+est-ce centré sur Sète ou multi-communes ?
+Refactorer en code production :
+src/data/preprocess.py : chargement, nettoyage, normalisation des identifiants (commune, bureau), harmonisation des colonnes, gestion des tours (si présents).
+src/features/build_features.py : construction des features “safe” et panel dataset.
+Scripts CLI : python -m src.data.preprocess ..., python -m src.features.build_features ...
+Générer (ou régénérer si nécessaire) un dataset final standard :
+data/processed/panel.parquet
+et un dictionnaire de données data/processed/data_dictionary.md
+Étape 2 — Base PostgreSQL pour l’historique (utilisée par Gradio)
+Construire une base PostgreSQL (docker-compose recommandé) qui stocke l’historique complet et permet de requêter rapidement par bureau.
+2.1 Livrables techniques DB
+docker-compose.yml lançant Postgres + un outil admin optionnel (pgAdmin facultatif).
+.env.example pour config DB (host, port, user, password, dbname).
+Schéma SQL (via Alembic OU SQLAlchemy create_all) versionné dans src/db/.
+2.2 Modèle de données (proposition minimale à implémenter)
+Tables conseillées (adapter si nécessaire, mais rester normalisé) :
+communes : id, name_normalized, insee_code (si dispo)
+bureaux : id, commune_id, bureau_code, bureau_label (si dispo), UNIQUE(commune_id, bureau_code)
+elections : id, election_type, election_year, round (nullable), date (nullable), UNIQUE(type, year, round)
+categories : id, name (les 7 catégories)
+results_local : id, bureau_id, election_id, category_id, share_pct, votes (nullable), expressed (nullable), turnout_pct (nullable)
+results_national : id, election_id, category_id, share_pct, votes (nullable), expressed (nullable), turnout_pct (nullable)
+2.3 Ingestion / ETL vers Postgres
+Créer src/db/ingest.py :
+lit les données depuis data/processed (préféré) sinon reconstruit depuis data/raw via preprocess + features.
+insère/upsère idempotent :
+communes, bureaux, elections, categories
+résultats locaux et nationaux
+logs clairs + contrôles de cohérence (ex: somme des parts ≈ 100, votes ≤ exprimés, etc.)
+script CLI : python -m src.db.ingest --input data/processed/panel.parquet
+Étape 3 — Modélisation & prédiction
+Construire un entraînement robuste + stockage des artefacts + prédiction par bureau.
+3.1 Cibles
+Multi-sorties : target_share_<categorie> pour les 7 catégories.
+3.2 Features attendues (au minimum)
+Pour une ligne (bureau, type, year=T) :
+historiques laggés par catégorie (antérieurs à T)
+prev_share_<cat>_any_lag1
+prev_share_<cat>_<type>_lag1 (si existant)
+écarts au national sur historiques :
+prev_dev_to_national_<cat>_any_lag1 = prev_share_bureau - prev_share_national (sur l’élection antérieure utilisée)
+ou par type si disponible
+participation / abstention historiques si dispos :
+prev_turnout_any_lag1, etc.
+variables “swing” :
+swing_<cat> = prev_share_lag1 - prev_share_lag2 (si lag2 existe)
+Toutes ces features doivent être calculées sans fuite (join-asof temporel ou logique équivalente).
+3.3 Split & évaluation (obligatoire)
+Interdiction de random split.
+Implémenter une évaluation temporelle paramétrable, ex :
+train <= 2017, valid 2019–2021, test >= 2022 (exemple : configurable)
+Métriques :
+MAE moyenne sur les 7 catégories
+MAE par catégorie
+option : erreur sur “catégorie gagnante”
+Générer :
+reports/metrics.json
+reports/metrics.md
+quelques figures (matplotlib) dans reports/figures/
+3.4 Modèles à entraîner
+Implémenter au moins :
+Ridge (baseline interprétable) avec standardisation
+HistGradientBoostingRegressor (via MultiOutputRegressor si nécessaire)
+LightGBM / XGBoost / CatBoost si installés (détection automatique, sinon skip proprement)
+Sauvegarder modèles et preprocessors dans models/ (joblib), avec un model_card.md (date, données, split, features, métriques).
+3.5 Prédiction pour une élection cible
+Créer src/model/predict.py :
+arguments : --target-election-type, --target-year, --commune (par défaut Sète)
+produit un CSV :
+predictions/pred_<type>_<year>_sete.csv
+colonnes : commune, bureau_code, predicted_share_ (7), + comparateurs (voir ci-dessous)
+Comparateurs à afficher dans Gradio
+Pour chaque catégorie, calculer 2 deltas (points de %):
+vs la dernière législative avant l’élection cible
+trouver dans la DB l’élection election_type='legislatives' avec année max < target_year (et même round logique si géré)
+récupérer le share_pct du bureau sur cette législative (par catégorie)
+delta_leg = predicted_share - share_leg
+vs les municipales 2020
+si target_year != 2020 : récupérer election_type='municipales' et election_year=2020 pour ce bureau
+delta_mun2020 = predicted_share - share_mun2020
+Si une référence manque (bureau absent, données manquantes), afficher “N/A” au lieu du delta.
+Étape 4 — Application Gradio
+Créer une app Gradio production-ready dans app/gradio_app.py.
+4.1 UI
+Titre : “Prévision Municipales — Ville de Sète”
+Inputs :
+Dropdown bureau : liste des bureaux disponibles pour Sète (requête DB)
+Dropdown election : couples (type, année) cibles (par défaut municipale 2026, mais liste configurable). Si 2026 n’existe pas en DB, elle doit pouvoir être sélectionnée quand même comme “cible future”.
+Bouton : “Prédire”
+4.2 Sorties
+Afficher :
+Un tableau (pandas dataframe ou composant gradio) avec 7 lignes (catégories) :
+categorie
+score_predit_%
+Δ vs législatives (dernières) (en points)
+Δ vs municipales 2020 (en points)
+Option bonus : un bar chart matplotlib des scores prédits par catégorie (simple, lisible).
+Format texte exigé (si rendu texte au lieu de tableau) :
+centre : 21.3% (+1.2 vs législatives, -0.8 vs mun 2020)
+et ainsi de suite
+Avec N/A si delta indisponible.
+4.3 Backend
+L’app ne doit pas recalculer tout le dataset à chaque clic.
+Au démarrage :
+se connecte à Postgres
+charge le modèle entraîné + preprocessor
+Lors d’une prédiction :
+récupère les features “safe” du bureau pour la cible (type, année) :
+soit via une table features pré-calculées,
+soit en construisant “à la volée” depuis l’historique DB (mais de manière efficace et sans fuite)
+applique modèle → prédictions → post-traitement (clip + renormalisation)
+calcule deltas vs références (législatives max<target_year, municipales 2020)
+renvoie la table + graph
+Architecture attendue du repo
+Créer / compléter l’arborescence :
+src/
+data/
+features/
+db/
+model/
+utils/
+app/
+gradio_app.py
+data/raw/ (existant)
+data/interim/ (existant)
+data/processed/ (existant)
+models/
+predictions/
+reports/
+notebooks/ (existant)
+Inclure :
+README.md très clair avec commandes :
+(a) preprocess/build_features
+(b) lancer Postgres
+(c) ingest DB
+(d) train/evaluate
+(e) lancer Gradio
+requirements.txt ou pyproject.toml
+logs (INFO) + messages d’erreur actionnables (ex : DB down, modèle absent, fichiers manquants)
+code robuste si data/raw vide : doit expliquer quoi mettre et comment nommer.
+Points d’attention “réels”
+gérer bureaux absents certaines années → imputation + deltas N/A
+gérer harmonisation des libellés bureau → normalisation + warning
+gérer tours (T1/T2) : inclure colonne round ou config, et éviter mélange non intentionnel
+le mapping “candidat/nuance -> catégorie” est critique :
+prévoir data/mappings/category_mapping.csv (ou YAML) et documenter la logique
+tout non-mappé -> autres puis redistribuer/ignorer selon règle explicite (mais comme les catégories sont imposées, définir une stratégie : soit exclure “autres” du modèle, soit le répartir, soit le conserver et renormaliser sur 7 catégories — choisir une approche et la documenter)
+Livrables finaux attendus
+Code complet (modules + scripts CLI)
+Schéma DB + docker-compose + script ingestion
+Pipeline entraînement/évaluation + artefacts modèles
+Application Gradio fonctionnelle
+Exemples de fichiers mapping :
+data/mappings/category_mapping.csv
+Documentation complète dans README
+Ne pas inventer de données. Travailler avec l’existant (data/interim, data/processed, notebooks), corriger si incohérent, et rendre l’ensemble production-ready (reproductible, configurable, sans fuite temporelle).

models/best_model.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "name": "hist_gradient_boosting"
+}

models/feature_columns.json ADDED Viewed

	@@ -0,0 +1,40 @@

+[
+  "prev_share_any_lag1_centre",
+  "prev_share_any_lag1_droite_dure",
+  "prev_share_any_lag1_droite_modere",
+  "prev_share_any_lag1_extreme_droite",
+  "prev_share_any_lag1_extreme_gauche",
+  "prev_share_any_lag1_gauche_dure",
+  "prev_share_any_lag1_gauche_modere",
+  "prev_share_type_lag1_centre",
+  "prev_share_type_lag1_droite_dure",
+  "prev_share_type_lag1_droite_modere",
+  "prev_share_type_lag1_extreme_droite",
+  "prev_share_type_lag1_extreme_gauche",
+  "prev_share_type_lag1_gauche_dure",
+  "prev_share_type_lag1_gauche_modere",
+  "prev_dev_to_national_any_lag1_centre",
+  "prev_dev_to_national_any_lag1_droite_dure",
+  "prev_dev_to_national_any_lag1_droite_modere",
+  "prev_dev_to_national_any_lag1_extreme_droite",
+  "prev_dev_to_national_any_lag1_extreme_gauche",
+  "prev_dev_to_national_any_lag1_gauche_dure",
+  "prev_dev_to_national_any_lag1_gauche_modere",
+  "prev_dev_to_national_type_lag1_centre",
+  "prev_dev_to_national_type_lag1_droite_dure",
+  "prev_dev_to_national_type_lag1_droite_modere",
+  "prev_dev_to_national_type_lag1_extreme_droite",
+  "prev_dev_to_national_type_lag1_extreme_gauche",
+  "prev_dev_to_national_type_lag1_gauche_dure",
+  "prev_dev_to_national_type_lag1_gauche_modere",
+  "swing_any_centre",
+  "swing_any_droite_dure",
+  "swing_any_droite_modere",
+  "swing_any_extreme_droite",
+  "swing_any_extreme_gauche",
+  "swing_any_gauche_dure",
+  "swing_any_gauche_modere",
+  "turnout_pct",
+  "prev_turnout_any_lag1",
+  "prev_turnout_same_type_lag1"
+]

models/hist_gradient_boosting.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:91189f0a1fa5876b60b75e54293f093023d12f1f32ee5e3076aa648659bf7afd
+size 2676501

models/model_card.md ADDED Viewed

	@@ -0,0 +1,8 @@

+# Model card
+- Modèle: hist_gradient_boosting
+- Split temporel: train<= 2019, valid<= 2021, test>= 2022
+- Features: 38 colonnes numériques (lags, écarts national, swing, turnout)
+- Cibles: parts par bloc (7 catégories) renormalisées.
+- Métriques principales (MAE moyen, jeux valid/test):
+  - Valid: 0.1233
+  - Test: 0.1146

requirements.txt ADDED Viewed

	@@ -0,0 +1,15 @@

+pandas>=2.2.0
+numpy>=1.26.0
+sqlalchemy>=2.0.0
+psycopg2-binary>=2.9.9
+gradio>=4.0.0
+pyarrow>=15.0.0
+scikit-learn>=1.4.0
+# Modèles gradient boosting / multi-output recommandés pour la prédiction bureau de vote
+lightgbm>=4.3.0
+xgboost>=2.0.0
+catboost>=1.2.5
+shap>=0.45.0
+pyyaml>=6.0.0
+matplotlib>=3.8.0
+folium>=0.16.0

src/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # Makes src a package so notebooks can import src.data_prep

src/constants.py ADDED Viewed

	@@ -0,0 +1,35 @@

+from __future__ import annotations
+# Canonical blocs/categories to surface in the app outputs (7 cibles)
+CANDIDATE_CATEGORIES = [
+    "centre",
+    "gauche_modere",
+    "droite_modere",
+    "gauche_dure",
+    "droite_dure",
+    "extreme_gauche",
+    "extreme_droite",
+]
+# Numeric columns used across the pipeline and DB ingestion
+NUMERIC_COLUMNS = [
+    "voix_bloc",
+    "exprimes",
+    "inscrits",
+    "votants",
+    "blancs",
+    "nuls",
+    "part_bloc",
+    "part_bloc_national",
+    "taux_participation_national",
+    "taux_participation_bv",
+    "taux_blancs_bv",
+    "taux_nuls_bv",
+    "ecart_bloc_vs_national",
+    "ecart_participation_vs_nat",
+    "croissance_inscrits_depuis_base",
+    "part_bloc_lag1",
+    "ecart_bloc_vs_national_lag1",
+    "taux_participation_bv_lag1",
+    "annee_centre",
+]

src/data/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@

+"""
+Data subpackage: preprocessing helpers and CLI entrypoints.
+"""

src/data/preprocess.py ADDED Viewed

	@@ -0,0 +1,481 @@

+from __future__ import annotations
+import argparse
+import json
+import logging
+from pathlib import Path
+from typing import Any, Dict, Iterable, Mapping
+import pandas as pd
+from src import data_prep
+LOGGER = logging.getLogger(__name__)
+DEFAULT_META_CONFIG: Dict[str, Dict[str, Any]] = {
+    "14_EU.csv": {
+        "type_scrutin": "europeennes",
+        "date_scrutin": "2014-05-25",
+        "tour_column": "N° tour",
+        "code_bv_cols": ["Code de la commune", "N° de bureau de vote"],
+        "rename_map": {
+            "Inscrits": "inscrits",
+            "Votants": "votants",
+            "Exprimés": "exprimes",
+            "ExprimÃ©s": "exprimes",
+            "Nombre de voix du candidat": "voix",
+            "Voix": "voix",
+            "Nom du candidat": "nom_candidature",
+            "Prénom du candidat": "nom_candidature",
+            "Code nuance du candidat": "code_candidature",
+        },
+    },
+    "14_MN14_T1T2.csv": {
+        "type_scrutin": "municipales",
+        "date_scrutin": "2014-03-23",
+        "tour_column": "N° tour",
+        "code_bv_cols": ["Code commune", "N° de bureau de vote"],
+        "rename_map": {
+            "Inscrits": "inscrits",
+            "Votants": "votants",
+            "Exprimés": "exprimes",
+            "Nombre de voix": "voix",
+            "Nom du candidat tête de liste": "nom_candidature",
+            "Prénom du candidat  tête de liste": "nom_candidature",
+            "Code nuance de la liste": "code_candidature",
+        },
+    },
+    "17_L_T1.csv": {
+        "type_scrutin": "legislatives",
+        "date_scrutin": "2017-06-11",
+        "tour": 1,
+        "code_bv_cols": ["Code de la commune", "Code du b.vote"],
+        "rename_map": {
+            "Inscrits": "inscrits",
+            "Abstentions": "abstentions",
+            "Votants": "votants",
+            "Blancs": "blancs",
+            "Nuls": "nuls",
+            "Exprimés": "exprimes",
+            "Voix": "voix",
+            "Nuance": "code_candidature",
+            "Nom": "nom_candidature",
+        },
+    },
+    "17_L_T2.csv": {
+        "type_scrutin": "legislatives",
+        "date_scrutin": "2017-06-18",
+        "tour": 2,
+        "code_bv_cols": ["Code de la commune", "Code du b.vote"],
+        "rename_map": {
+            "Inscrits": "inscrits",
+            "Abstentions": "abstentions",
+            "Votants": "votants",
+            "Blancs": "blancs",
+            "Nuls": "nuls",
+            "Exprimés": "exprimes",
+            "Voix": "voix",
+            "Nuance": "code_candidature",
+            "Nom": "nom_candidature",
+        },
+    },
+    "17_PR_T1.csv": {
+        "type_scrutin": "presidentielles",
+        "date_scrutin": "2017-04-23",
+        "tour": 1,
+        "code_bv_cols": ["Code de la commune", "Code du b.vote"],
+        "rename_map": {
+            "Inscrits": "inscrits",
+            "Abstentions": "abstentions",
+            "Votants": "votants",
+            "Blancs": "blancs",
+            "Nuls": "nuls",
+            "Exprimés": "exprimes",
+            "Voix": "voix",
+            "Nom": "nom_candidature",
+            "Code nuance du candidat": "code_candidature",
+        },
+    },
+    "17_PR_T2.csv": {
+        "type_scrutin": "presidentielles",
+        "date_scrutin": "2017-05-07",
+        "tour": 2,
+        "code_bv_cols": ["Code de la commune", "Code du b.vote"],
+        "rename_map": {
+            "Inscrits": "inscrits",
+            "Abstentions": "abstentions",
+            "Votants": "votants",
+            "Blancs": "blancs",
+            "Nuls": "nuls",
+            "Exprimés": "exprimes",
+            "Voix": "voix",
+            "Nom": "nom_candidature",
+            "Code nuance du candidat": "code_candidature",
+        },
+    },
+    "19_EU.csv": {
+        "type_scrutin": "europeennes",
+        "date_scrutin": "2019-05-26",
+        "tour": 1,
+        "code_bv_cols": ["Code de la commune", "Code du b.vote"],
+        "rename_map": {
+            "Inscrits": "inscrits",
+            "Abstentions": "abstentions",
+            "Votants": "votants",
+            "Blancs": "blancs",
+            "Nuls": "nuls",
+            "Exprimés": "exprimes",
+            "Voix": "voix",
+            "Nom Tête de Liste": "nom_candidature",
+            "Nuance Liste": "code_candidature",
+        },
+    },
+    "20_MN_T1.csv": {
+        "type_scrutin": "municipales",
+        "date_scrutin": "2020-03-15",
+        "tour": 1,
+        "sep": ";",
+        "code_bv_cols": ["Code de la commune", "Code B.Vote"],
+        "rename_map": {
+            "Inscrits": "inscrits",
+            "Abstentions": "abstentions",
+            "Votants": "votants",
+            "Blancs": "blancs",
+            "Nuls": "nuls",
+            "Exprimés": "exprimes",
+            "Voix": "voix",
+            "Nom": "nom_candidature",
+            "Liste": "nom_candidature",
+            "Code Nuance": "code_candidature",
+        },
+    },
+    "20_MN_T2.csv": {
+        "type_scrutin": "municipales",
+        "date_scrutin": "2020-06-28",
+        "tour": 2,
+        "code_bv_cols": ["Code de la commune", "Code B.Vote"],
+        "rename_map": {
+            "Inscrits": "inscrits",
+            "Abstentions": "abstentions",
+            "Votants": "votants",
+            "Blancs": "blancs",
+            "Nuls": "nuls",
+            "Exprimés": "exprimes",
+            "Voix": "voix",
+            "Nom": "nom_candidature",
+            "Liste": "nom_candidature",
+            "Code Nuance": "code_candidature",
+        },
+    },
+    "21_DEP_T1.csv": {
+        "type_scrutin": "departementales",
+        "date_scrutin": "2021-06-20",
+        "tour": 1,
+        "code_bv_cols": ["Code de la commune", "Code du b.vote"],
+        "rename_map": {
+            "Inscrits": "inscrits",
+            "Abstentions": "abstentions",
+            "Votants": "votants",
+            "Blancs": "blancs",
+            "Nuls": "nuls",
+            "Exprimés": "exprimes",
+            "Voix": "voix",
+            "Nuance": "code_candidature",
+            "Binôme": "nom_candidature",
+        },
+    },
+    "21_DEP_T2.csv": {
+        "type_scrutin": "departementales",
+        "date_scrutin": "2021-06-27",
+        "tour": 2,
+        "code_bv_cols": ["Code de la commune", "Code du b.vote"],
+        "rename_map": {
+            "Inscrits": "inscrits",
+            "Abstentions": "abstentions",
+            "Votants": "votants",
+            "Blancs": "blancs",
+            "Nuls": "nuls",
+            "Exprimés": "exprimes",
+            "Voix": "voix",
+            "Nuance": "code_candidature",
+            "Binôme": "nom_candidature",
+        },
+    },
+    "21_REG_T1.csv": {
+        "type_scrutin": "regionales",
+        "date_scrutin": "2021-06-20",
+        "tour": 1,
+        "code_bv_cols": ["Code de la commune", "Code du b.vote"],
+        "rename_map": {
+            "Inscrits": "inscrits",
+            "Abstentions": "abstentions",
+            "Votants": "votants",
+            "Blancs": "blancs",
+            "Nuls": "nuls",
+            "Exprimés": "exprimes",
+            "Voix": "voix",
+            "Nuance Liste": "code_candidature",
+            "Libellé Abrégé Liste": "nom_candidature",
+        },
+    },
+    "21_REG_T2.csv": {
+        "type_scrutin": "regionales",
+        "date_scrutin": "2021-06-27",
+        "tour": 2,
+        "code_bv_cols": ["Code de la commune", "Code du b.vote"],
+        "rename_map": {
+            "Inscrits": "inscrits",
+            "Abstentions": "abstentions",
+            "Votants": "votants",
+            "Blancs": "blancs",
+            "Nuls": "nuls",
+            "Exprimés": "exprimes",
+            "Voix": "voix",
+            "Nuance Liste": "code_candidature",
+            "Libellé Abrégé Liste": "nom_candidature",
+        },
+    },
+    "22_L_T1.csv": {
+        "type_scrutin": "legislatives",
+        "date_scrutin": "2022-06-12",
+        "tour": 1,
+        "code_bv_cols": ["Code de la commune", "Code du b.vote"],
+        "rename_map": {
+            "Inscrits": "inscrits",
+            "Abstentions": "abstentions",
+            "Votants": "votants",
+            "Blancs": "blancs",
+            "Nuls": "nuls",
+            "Exprimés": "exprimes",
+            "Voix": "voix",
+            "Nuance": "code_candidature",
+            "Nom": "nom_candidature",
+        },
+    },
+    "22_L_T2.csv": {
+        "type_scrutin": "legislatives",
+        "date_scrutin": "2022-06-19",
+        "tour": 2,
+        "code_bv_cols": ["Code de la commune", "Code du b.vote"],
+        "rename_map": {
+            "Inscrits": "inscrits",
+            "Abstentions": "abstentions",
+            "Votants": "votants",
+            "Blancs": "blancs",
+            "Nuls": "nuls",
+            "Exprimés": "exprimes",
+            "Voix": "voix",
+            "Nuance": "code_candidature",
+            "Nom": "nom_candidature",
+        },
+    },
+    "22_PR_T1.csv": {
+        "type_scrutin": "presidentielles",
+        "date_scrutin": "2022-04-10",
+        "tour": 1,
+        "code_bv_cols": ["Code de la commune", "Code du b.vote"],
+        "rename_map": {
+            "Inscrits": "inscrits",
+            "Abstentions": "abstentions",
+            "Votants": "votants",
+            "Blancs": "blancs",
+            "Nuls": "nuls",
+            "Exprimés": "exprimes",
+            "Voix": "voix",
+            "Nom": "nom_candidature",
+            "Code nuance du candidat": "code_candidature",
+        },
+    },
+    "22_PR_T2.csv": {
+        "type_scrutin": "presidentielles",
+        "date_scrutin": "2022-04-24",
+        "tour": 2,
+        "code_bv_cols": ["Code de la commune", "Code du b.vote"],
+        "rename_map": {
+            "Inscrits": "inscrits",
+            "Abstentions": "abstentions",
+            "Votants": "votants",
+            "Blancs": "blancs",
+            "Nuls": "nuls",
+            "Exprimés": "exprimes",
+            "Voix": "voix",
+            "Nom": "nom_candidature",
+            "Code nuance du candidat": "code_candidature",
+        },
+    },
+    "24_EU.csv": {
+        "type_scrutin": "europeennes",
+        "date_scrutin": "2024-06-09",
+        "tour": 1,
+        "code_bv_cols": ["Code commune", "Code BV"],
+        "rename_map": {
+            "Inscrits": "inscrits",
+            "Abstentions": "abstentions",
+            "Votants": "votants",
+            "Blancs": "blancs",
+            "Nuls": "nuls",
+            "Exprimés": "exprimes",
+            "Voix 1": "voix",
+            "Voix": "voix",
+            "Nuance liste 1": "code_candidature",
+            "Libellé abrégé de liste 1": "nom_candidature",
+        },
+    },
+    "24_L_T1.csv": {
+        "type_scrutin": "legislatives",
+        "date_scrutin": "2024-06-30",
+        "tour": 1,
+        "code_bv_cols": ["Code commune", "Code BV"],
+        "rename_map": {
+            "Inscrits": "inscrits",
+            "Abstentions": "abstentions",
+            "Votants": "votants",
+            "Blancs": "blancs",
+            "Nuls": "nuls",
+            "Exprimés": "exprimes",
+            "Voix": "voix",
+            "Nuance Liste": "code_candidature",
+            "Libellé Abrégé Liste": "nom_candidature",
+            "Binôme": "nom_candidature",
+        },
+    },
+    "24_L_T2.csv": {
+        "type_scrutin": "legislatives",
+        "date_scrutin": "2024-07-07",
+        "tour": 2,
+        "code_bv_cols": ["Code commune", "Code BV"],
+        "rename_map": {
+            "Inscrits": "inscrits",
+            "Abstentions": "abstentions",
+            "Votants": "votants",
+            "Blancs": "blancs",
+            "Nuls": "nuls",
+            "Exprimés": "exprimes",
+            "Voix": "voix",
+            "Nuance Liste": "code_candidature",
+            "Libellé Abrégé Liste": "nom_candidature",
+            "Binôme": "nom_candidature",
+        },
+    },
+}
+DEFAULT_META_CONFIG_PATH = Path(__file__).resolve().parents[2] / "config" / "raw_sources.yaml"
+def _resolve_meta_config(raw: Mapping[str, Mapping[str, Any]]) -> Dict[str, Dict[str, Any]]:
+    resolved: Dict[str, Dict[str, Any]] = {}
+    def resolve_one(key: str, stack: list[str]) -> Dict[str, Any]:
+        if key in resolved:
+            return resolved[key]
+        if key in stack:
+            raise ValueError(f"Cycle detecte dans meta-config: {' -> '.join(stack + [key])}")
+        meta = dict(raw[key])
+        base_key = meta.pop("copy_from", None)
+        if base_key:
+            if base_key not in raw:
+                raise KeyError(f"copy_from cible introuvable: {base_key}")
+            base = resolve_one(base_key, stack + [key])
+            merged = dict(base)
+            rename_base = dict(base.get("rename_map", {}))
+            rename_override = dict(meta.get("rename_map", {}))
+            merged.update(meta)
+            if rename_base or rename_override:
+                merged["rename_map"] = {**rename_base, **rename_override}
+            resolved[key] = merged
+        else:
+            resolved[key] = meta
+        return resolved[key]
+    for name in raw:
+        resolve_one(name, [])
+    return resolved
+def load_meta_config(meta_path: Path | None) -> Dict[str, Dict[str, Any]]:
+    if meta_path is None:
+        if DEFAULT_META_CONFIG_PATH.exists():
+            meta_path = DEFAULT_META_CONFIG_PATH
+        else:
+            return DEFAULT_META_CONFIG
+    if not meta_path.exists():
+        raise FileNotFoundError(f"Meta-config file not found: {meta_path}")
+    if meta_path.suffix in {".yml", ".yaml"}:
+        try:
+            import yaml
+        except Exception as exc:
+            raise RuntimeError("PyYAML is required to read YAML meta-config files.") from exc
+        raw = yaml.safe_load(meta_path.read_text()) or {}
+    else:
+        raw = json.loads(meta_path.read_text())
+    if not isinstance(raw, dict):
+        raise ValueError("Meta-config invalide: attendu un mapping de fichiers vers meta-donnees.")
+    return _resolve_meta_config(raw)
+def preprocess_all(raw_dir: Path, output_dir: Path, meta_config: Mapping[str, Mapping[str, Any]]) -> pd.DataFrame:
+    frames = []
+    missing: list[str] = []
+    for file_name, meta in meta_config.items():
+        path = raw_dir / file_name
+        if not path.exists():
+            missing.append(file_name)
+            continue
+        LOGGER.info("Standardisation de %s", file_name)
+        df_std = data_prep.standardize_election(
+            path,
+            meta,
+            rename_map=meta.get("rename_map", {}),
+            sep=meta.get("sep", ";"),
+            encoding=meta.get("encoding", ("cp1252", "utf-8-sig", "latin-1")),
+            decimal=meta.get("decimal", ","),
+        )  # type: ignore[arg-type]
+        frames.append(df_std)
+    if missing:
+        LOGGER.warning("Fichiers manquants ignorés: %s", ", ".join(sorted(missing)))
+    if not frames:
+        raise RuntimeError("Aucune donnée chargée : vérifier le dossier raw et la configuration meta.")
+    elections_long = pd.concat(frames, ignore_index=True)
+    elections_long["date_scrutin"] = pd.to_datetime(elections_long["date_scrutin"])
+    elections_long["annee"] = elections_long["date_scrutin"].dt.year
+    elections_long["type_scrutin"] = elections_long["type_scrutin"].str.lower()
+    elections_long["code_commune"] = elections_long["code_bv"].astype(str).str.split("-").str[0]
+    issues = data_prep.validate_consistency(elections_long)
+    for name, df_issue in issues.items():
+        if len(df_issue) > 0:
+            LOGGER.warning("%s : %s lignes a inspecter", name, len(df_issue))
+    output_dir.mkdir(parents=True, exist_ok=True)
+    parquet_path = output_dir / "elections_long.parquet"
+    csv_path = output_dir / "elections_long.csv"
+    elections_long.to_parquet(parquet_path, index=False)
+    elections_long.to_csv(csv_path, sep=";", index=False)
+    LOGGER.info("Long format sauvegarde (%s lignes) -> %s / %s", len(elections_long), parquet_path, csv_path)
+    return elections_long
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Prétraitement des fichiers bruts en format long standardisé.")
+    parser.add_argument("--raw-dir", type=Path, default=Path("data/raw"), help="Répertoire des fichiers bruts CSV.")
+    parser.add_argument("--output-dir", type=Path, default=Path("data/interim"), help="Destination du format long harmonisé.")
+    parser.add_argument(
+        "--meta-config",
+        type=Path,
+        default=None,
+        help="Chemin vers un fichier JSON/YAML décrivant les meta-données des scrutins. Par défaut, utilise la configuration embarquée.",
+    )
+    return parser.parse_args()
+def main() -> None:
+    logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s")
+    args = parse_args()
+    meta_config = load_meta_config(args.meta_config)
+    preprocess_all(args.raw_dir, args.output_dir, meta_config)
+if __name__ == "__main__":
+    main()

src/data_prep.py ADDED Viewed

	@@ -0,0 +1,418 @@

+from __future__ import annotations
+from pathlib import Path
+import re
+from typing import Dict, Iterable, List, Mapping, Optional
+import numpy as np
+import pandas as pd
+# Columns kept across all scrutins
+STANDARD_COLUMNS: List[str] = [
+    "code_bv",
+    "nom_bv",
+    "annee",
+    "date_scrutin",
+    "type_scrutin",
+    "tour",
+    "inscrits",
+    "votants",
+    "abstentions",
+    "blancs",
+    "nuls",
+    "exprimes",
+    "code_candidature",
+    "nom_candidature",
+    "voix",
+]
+NUMERIC_COLUMNS = [
+    "inscrits",
+    "votants",
+    "abstentions",
+    "blancs",
+    "nuls",
+    "exprimes",
+    "voix",
+]
+_MOJIBAKE_REPLACEMENTS = {
+    "Ã©": "é",
+    "Ã¨": "è",
+    "Ãª": "ê",
+    "Ã«": "ë",
+    "Ã ": "à",
+    "Ã¢": "â",
+    "Ã§": "ç",
+    "Ã¹": "ù",
+    "Ã»": "û",
+    "Ã¯": "ï",
+    "Ã´": "ô",
+    "Ã¶": "ö",
+    "Ã‰": "É",
+    "Ãˆ": "È",
+    "ÃŠ": "Ê",
+    "Ã‹": "Ë",
+    "Ã€": "À",
+    "Ã‚": "Â",
+    "Ã‡": "Ç",
+    "ï¿½": "°",
+    "�": "°",
+}
+def _normalize_label(label: str) -> str:
+    """
+    Attempt to repair mojibake in column labels (UTF-8 read as latin-1 or vice versa).
+    """
+    fixed = label
+    try:
+        fixed = label.encode("latin1").decode("utf-8")
+    except (UnicodeEncodeError, UnicodeDecodeError):
+        fixed = label
+    else:
+        if "Â" in fixed:
+            fixed = fixed.replace("Â", "")
+    try:
+        # Alternate path: utf-8 bytes decoded as latin1 then re-decoded
+        fixed = fixed.encode("utf-8").decode("latin1")
+    except (UnicodeEncodeError, UnicodeDecodeError):
+        pass
+    for bad, good in _MOJIBAKE_REPLACEMENTS.items():
+        if bad in fixed:
+            fixed = fixed.replace(bad, good)
+    fixed = fixed.replace("\ufeff", "")  # remove BOM
+    fixed = " ".join(fixed.split())  # normalise whitespace
+    return fixed
+def _canonical_label(label: str) -> str:
+    """
+    Lowercase alpha-numeric only version of a label for fuzzy matching.
+    """
+    import re
+    norm = _normalize_label(label).lower()
+    return re.sub(r"[^0-9a-z]", "", norm)
+def _unpivot_wide_candidates(df: pd.DataFrame) -> pd.DataFrame:
+    """
+    Detect wide candidate columns (e.g., 'Voix 1', 'Nuance liste 2') and unpivot to long.
+    Keeps one row per candidate with standard columns 'voix' and 'code_candidature'.
+    """
+    pattern = re.compile(r"^(?P<base>.*?)(?:\s+|_)?(?P<idx>\d+)$")
+    candidate_map: Dict[str, Dict[str, str]] = {}
+    wide_cols: set[str] = set()
+    for col in df.columns:
+        match = pattern.match(col)
+        if not match:
+            continue
+        wide_cols.add(col)
+        base = match.group("base").strip()
+        idx = match.group("idx")
+        canon = _canonical_label(base)
+        field = None
+        if canon == "voix":
+            field = "voix"
+        elif canon in {"nuance", "nuanceliste", "codenuance", "codenuanceducandidat", "codenuanceliste"}:
+            field = "code_candidature"
+        if field:
+            candidate_map.setdefault(idx, {})[field] = col
+    indices = [
+        idx for idx, fields in candidate_map.items()
+        if {"voix", "code_candidature"}.issubset(fields.keys())
+    ]
+    if len(indices) <= 1:
+        return df
+    candidate_cols = {col for fields in candidate_map.values() for col in fields.values()}
+    base_cols = [c for c in df.columns if c not in wide_cols]
+    frames = []
+    for idx in sorted(indices, key=lambda v: int(v)):
+        fields = candidate_map[idx]
+        use_cols = base_cols + list(fields.values())
+        sub = df[use_cols].copy()
+        sub = sub.rename(
+            columns={
+                fields["voix"]: "voix",
+                fields["code_candidature"]: "code_candidature",
+            }
+        )
+        frames.append(sub)
+    return pd.concat(frames, ignore_index=True)
+def deduplicate_columns(df: pd.DataFrame) -> pd.DataFrame:
+    """
+    If multiple columns end up with the same name after rename/normalization,
+    keep the first non-null value across duplicates and drop the extras.
+    """
+    df = df.copy()
+    duplicates = df.columns[df.columns.duplicated()].unique()
+    for col in duplicates:
+        cols = [c for c in df.columns if c == col]
+        base = df[cols[0]]
+        for extra in cols[1:]:
+            base = base.fillna(df[extra])
+        df[col] = base
+        df = df.drop(columns=cols[1:])
+    # ensure uniqueness
+    df = df.loc[:, ~df.columns.duplicated()]
+    return df
+def load_raw(
+    path: Path,
+    *,
+    sep: str = ";",
+    encoding: str | Iterable[str] = "cp1252",
+    decimal: str = ",",
+    dtype: Optional[Mapping[str, str]] = None,
+    engine: str = "c",
+) -> pd.DataFrame:
+    """
+    Wrapper around read_csv with encoding fallbacks to mitigate mojibake.
+    Tries encodings in order (default: cp1252, utf-8-sig, latin-1) until column
+    names no longer contain replacement artefacts (� or Ã), then normalises labels.
+    """
+    encoding_choices: List[str] = []
+    if isinstance(encoding, str):
+        encoding_choices.append(encoding)
+    else:
+        encoding_choices.extend(list(encoding))
+    encoding_choices.extend([e for e in ["utf-8-sig", "latin-1"] if e not in encoding_choices])
+    last_exc: Optional[Exception] = None
+    for enc in encoding_choices:
+        try:
+            try:
+                df = pd.read_csv(
+                    path,
+                    sep=sep,
+                    encoding=enc,
+                    decimal=decimal,
+                    dtype=dtype,  # type: ignore
+                    engine=engine,  # type: ignore
+                    low_memory=False,
+                )
+            except pd.errors.ParserError:
+                # Retry with python engine and skip malformed lines (low_memory not supported)
+                df = pd.read_csv(
+                    path,
+                    sep=sep,
+                    encoding=enc,
+                    decimal=decimal,
+                    dtype=dtype,  # type: ignore
+                    engine="python",
+                    on_bad_lines="skip",
+                )
+        except UnicodeDecodeError as exc:
+            last_exc = exc
+            continue
+        bad_cols = any(("�" in col) or ("Ã" in col) for col in df.columns)
+        if bad_cols and enc != encoding_choices[-1]:
+            # try next encoding candidate
+            continue
+    df.columns = [_normalize_label(c) for c in df.columns]
+    return df
+    if last_exc:
+        raise last_exc
+    raise UnicodeDecodeError("utf-8", b"", 0, 1, "unable to decode with provided encodings")
+def ensure_columns(df: pd.DataFrame, required: Iterable[str]) -> pd.DataFrame:
+    """
+    Add missing columns with NaN placeholders to guarantee downstream compatibility.
+    """
+    for col in required:
+        if col not in df.columns:
+            df[col] = np.nan
+    return df
+def add_election_metadata(df: pd.DataFrame, meta: Mapping[str, object]) -> pd.DataFrame:
+    """
+    Attach metadata about the scrutin to each row.
+    Required meta keys:
+        - type_scrutin
+        - tour
+        - date_scrutin
+    Optional:
+        - annee (otherwise derived from date_scrutin)
+    """
+    df["type_scrutin"] = meta["type_scrutin"]
+    df["tour"] = int(meta["tour"]) # type: ignore
+    df["date_scrutin"] = pd.to_datetime(meta["date_scrutin"]) # type: ignore
+    df["annee"] = meta.get("annee", df["date_scrutin"].dt.year) # type: ignore
+    return df
+def build_code_bv(df: pd.DataFrame, meta: Mapping[str, object]) -> pd.DataFrame:
+    """
+    Ensure a code_bv column exists. If already present, it is left intact.
+    Optionally, pass in meta["code_bv_cols"] as a list of column names to combine.
+    """
+    if "code_bv" in df.columns:
+        df["code_bv"] = df["code_bv"].astype(str).str.strip()
+        return df
+    columns_to_concat: Optional[List[str]] = meta.get("code_bv_cols")  # type: ignore[arg-type]
+    if columns_to_concat:
+        actual_cols: List[str] = []
+        canon_map = {_canonical_label(col): col for col in df.columns}
+        for target in columns_to_concat:
+            canon = _canonical_label(target)
+            if canon in canon_map:
+                actual_cols.append(canon_map[canon])
+            else:
+                raise KeyError(f"{target!r} not found in columns. Available: {list(df.columns)}")
+        df["code_bv"] = (
+            df[actual_cols]
+            .astype(str)
+            .apply(lambda row: "-".join([v.zfill(3) if v.isdigit() else v for v in row]), axis=1)
+        )
+    else:
+        raise KeyError("code_bv not found in dataframe and no code_bv_cols provided in meta.")
+    return df
+def coerce_numeric(df: pd.DataFrame, numeric_cols: Iterable[str] = NUMERIC_COLUMNS) -> pd.DataFrame:
+    for col in numeric_cols:
+        if col in df.columns:
+            df[col] = pd.to_numeric(df[col], errors="coerce")
+    return df
+def basic_cleaning(df: pd.DataFrame) -> pd.DataFrame:
+    """
+    Apply harmonisations common to all scrutins.
+    """
+    df = df.copy()
+    df["voix"] = df.get("voix", 0).fillna(0) # type: ignore
+    # Recompute exprimes when possible
+    mask_expr = (
+        df["exprimes"].isna()
+        & df["votants"].notna()
+        & df["blancs"].notna()
+        & df["nuls"].notna()
+    )
+    df.loc[mask_expr, "exprimes"] = (
+        df.loc[mask_expr, "votants"] - df.loc[mask_expr, "blancs"] - df.loc[mask_expr, "nuls"]
+    )
+    # Remove rows without minimal identifiers
+    df = df[df["code_bv"].notna()]
+    return df
+def standardize_election(
+    path: Path,
+    meta: Mapping[str, object],
+    *,
+    rename_map: Optional[Mapping[str, str]] = None,
+    sep: str = ";",
+    encoding: str | Iterable[str] = ("cp1252", "utf-8-sig", "latin-1"),
+    decimal: str = ",",
+    dtype: Optional[Mapping[str, str]] = None,
+) -> pd.DataFrame:
+    """
+    Load and standardise a single raw table to the long format expected downstream.
+    Parameters
+    ----------
+    path : Path
+        CSV path to the raw election table.
+    meta : Mapping
+        Must contain type_scrutin, tour, date_scrutin. Optionally code_bv_cols and annee.
+    rename_map : Mapping
+        Columns to rename from the raw schema to the standard schema.
+    """
+    df_raw = load_raw(path, sep=sep, encoding=encoding, decimal=decimal, dtype=dtype)
+    rename_norm = {_normalize_label(k): v for k, v in (rename_map or {}).items()}
+    def _process(df: pd.DataFrame, meta_for_tour: Mapping[str, object]) -> pd.DataFrame:
+        df_local = df.copy()
+        df_local.columns = [_normalize_label(c) for c in df_local.columns]
+        df_local = _unpivot_wide_candidates(df_local)
+        if rename_norm:
+            # Renommer en se basant sur une version canonique (sans accents/espaces) et en ignorant d'éventuels suffixes numériques.
+            import re
+            def canonical_base(label: str) -> str:
+                base = _canonical_label(label)
+                return re.sub(r"\\d+$", "", base)
+            rename_by_base = {canonical_base(k): v for k, v in rename_norm.items()}
+            rename_using = {}
+            for col in df_local.columns:
+                base = canonical_base(col)
+                if base in rename_by_base:
+                    rename_using[col] = rename_by_base[base]
+            df_local = df_local.rename(columns=rename_using)
+        df_local = deduplicate_columns(df_local)
+        df_local = df_local.loc[:, ~df_local.columns.duplicated()]
+        df_local = build_code_bv(df_local, meta_for_tour)
+        df_local = add_election_metadata(df_local, meta_for_tour)
+        df_local = ensure_columns(df_local, STANDARD_COLUMNS)
+        df_local = coerce_numeric(df_local)
+        df_local = basic_cleaning(df_local)
+        ordered_cols = STANDARD_COLUMNS + [col for col in df_local.columns if col not in STANDARD_COLUMNS]
+        return df_local[ordered_cols]
+    # Multi-tour handling: split on tour_column if provided and "tour" not explicit
+    if meta.get("tour_column") and "tour" not in meta:
+        tour_col = _normalize_label(str(meta["tour_column"]))
+        if tour_col not in df_raw.columns:
+            # Fallback: considérer un seul tour = 1 si la colonne est introuvable
+            meta_single = {k: v for k, v in meta.items() if k != "tour_column"}
+            meta_single["tour"] = int(meta.get("tour", 1))
+            return _process(df_raw, meta_single)
+        tours = meta.get("tours") or sorted(df_raw[tour_col].dropna().unique())
+        frames: list[pd.DataFrame] = []
+        for tour_val in tours:
+            meta_tour = {k: v for k, v in meta.items() if k != "tour_column"}
+            meta_tour["tour"] = int(tour_val)
+            frames.append(_process(df_raw[df_raw[tour_col] == tour_val], meta_tour))
+        if not frames:
+            raise RuntimeError(f"Aucun tour détecté pour {path.name}")
+        return pd.concat(frames, ignore_index=True)
+    return _process(df_raw, meta)
+def validate_consistency(df: pd.DataFrame, *, tolerance: float = 0.02) -> Dict[str, pd.DataFrame]:
+    """
+    Quick validation checks. Returns a dict of issues to inspect.
+    """
+    issues: Dict[str, pd.DataFrame] = {}
+    if {"votants", "inscrits"}.issubset(df.columns):
+        issues["votants_gt_inscrits"] = df[df["votants"] > df["inscrits"]]
+    if {"exprimes", "blancs", "nuls", "votants"}.issubset(df.columns):
+        expr_gap = df.copy()
+        expr_gap["gap"] = (
+            (expr_gap["exprimes"] + expr_gap["blancs"] + expr_gap["nuls"] - expr_gap["votants"])
+            / expr_gap["votants"].replace(0, np.nan)
+        )
+        issues["exprimes_balance_off"] = expr_gap[expr_gap["gap"].abs() > tolerance]
+    if {"code_bv", "type_scrutin", "tour", "exprimes", "voix"}.issubset(df.columns):
+        sums = df.groupby(["code_bv", "type_scrutin", "tour"], as_index=False)[["exprimes", "voix"]].sum()
+        sums["gap"] = (sums["voix"] - sums["exprimes"]) / sums["exprimes"].replace(0, np.nan)
+        issues["sum_voix_vs_exprimes"] = sums[sums["gap"].abs() > tolerance]
+    return issues

src/database.py ADDED Viewed

	@@ -0,0 +1,153 @@

+from __future__ import annotations
+import os
+from pathlib import Path
+from typing import Iterable, Optional
+import pandas as pd
+import sqlalchemy as sa
+from sqlalchemy import Column, Date, Float, Integer, MetaData, String, Table
+from sqlalchemy.engine import Engine
+from .constants import NUMERIC_COLUMNS
+from .pipeline import normalize_bloc
+def get_engine(url: Optional[str] = None) -> Engine:
+    db_url = url or os.getenv("DATABASE_URL")
+    if not db_url:
+        raise RuntimeError("DATABASE_URL is not set. Example: postgresql+psycopg2://user:pass@localhost:5432/elections")
+    return sa.create_engine(db_url)
+def define_schema(metadata: MetaData) -> Table:
+    return Table(
+        "election_results",
+        metadata,
+        Column("id", Integer, primary_key=True, autoincrement=True),
+        Column("code_bv", String(32), index=True, nullable=False),
+        Column("nom_bv", String(255)),
+        Column("date_scrutin", Date, index=True, nullable=False),
+        Column("annee", Integer, index=True, nullable=False),
+        Column("type_scrutin", String(32), index=True, nullable=False),
+        Column("tour", Integer, nullable=False),
+        Column("bloc", String(64), index=True, nullable=False),
+        Column("voix_bloc", Float),
+        Column("exprimes", Float),
+        Column("inscrits", Float),
+        Column("votants", Float),
+        Column("blancs", Float),
+        Column("nuls", Float),
+        Column("part_bloc", Float),
+        Column("part_bloc_national", Float),
+        Column("taux_participation_national", Float),
+        Column("taux_participation_bv", Float),
+        Column("taux_blancs_bv", Float),
+        Column("taux_nuls_bv", Float),
+        Column("ecart_bloc_vs_national", Float),
+        Column("ecart_participation_vs_nat", Float),
+        Column("croissance_inscrits_depuis_base", Float),
+        Column("part_bloc_lag1", Float),
+        Column("ecart_bloc_vs_national_lag1", Float),
+        Column("taux_participation_bv_lag1", Float),
+        Column("annee_centre", Float),
+    )
+def create_schema(engine: Engine) -> None:
+    metadata = MetaData()
+    define_schema(metadata)
+    metadata.create_all(engine)
+def _coerce_numeric(df: pd.DataFrame, numeric_cols: Iterable[str]) -> pd.DataFrame:
+    for col in numeric_cols:
+        if col in df.columns:
+            df[col] = pd.to_numeric(df[col], errors="coerce")
+    return df
+def load_processed_to_db(
+    processed_path: Path = Path("data/processed/elections_blocs.csv"),
+    *,
+    engine: Optional[Engine] = None,
+    if_exists: str = "replace",
+    chunksize: int = 1000,
+) -> int:
+    """
+    Load the processed bloc-level dataset into PostgreSQL.
+    Returns the number of rows written.
+    """
+    engine = engine or get_engine()
+    create_schema(engine)
+    df = pd.read_csv(processed_path, sep=";")
+    df["date_scrutin"] = pd.to_datetime(df["date_scrutin"]).dt.date
+    if "bloc" in df.columns:
+        df["bloc"] = df["bloc"].apply(normalize_bloc)
+    df = _coerce_numeric(df, NUMERIC_COLUMNS)
+    df.to_sql(
+        "election_results",
+        engine,
+        if_exists=if_exists,
+        index=False,
+        method="multi",
+        chunksize=chunksize,
+    )
+    return len(df)
+def list_bureaux(engine: Engine) -> list[str]:
+    with engine.connect() as conn:
+        result = conn.execute(sa.text("select distinct code_bv from election_results order by code_bv"))
+        return [row[0] for row in result.fetchall()]
+def fetch_history(engine: Engine, code_bv: str) -> pd.DataFrame:
+    query = sa.text(
+        """
+        select *
+        from election_results
+        where code_bv = :code_bv
+        order by date_scrutin asc, bloc asc
+        """
+    )
+    return pd.read_sql(query, engine, params={"code_bv": code_bv})
+__all__ = [
+    "create_schema",
+    "define_schema",
+    "fetch_history",
+    "get_engine",
+    "list_bureaux",
+    "load_processed_to_db",
+]
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser(description="Initialise la base et charge les résultats.")
+    parser.add_argument(
+        "--load",
+        action="store_true",
+        help="Charger data/processed/elections_blocs.csv dans la base (remplace la table).",
+    )
+    parser.add_argument(
+        "--path",
+        type=Path,
+        default=Path("data/processed/elections_blocs.csv"),
+        help="Chemin vers le fichier processe (CSV ; par defaut data/processed/elections_blocs.csv).",
+    )
+    args = parser.parse_args()
+    engine = get_engine()
+    create_schema(engine)
+    if args.load:
+        rows = load_processed_to_db(args.path, engine=engine)
+        print(f"{rows} lignes inserees dans election_results.")
+    else:
+        print("Schema cree. Utilisez --load pour charger les donnees.")

src/db/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@

+"""
+Database schema and ingestion utilities.
+"""

src/db/ingest.py ADDED Viewed

	@@ -0,0 +1,241 @@

+from __future__ import annotations
+import argparse
+import logging
+from pathlib import Path
+from typing import Dict, Iterable, Tuple
+import numpy as np
+import pandas as pd
+import sqlalchemy as sa
+from sqlalchemy.dialects.postgresql import insert
+from src.constants import CANDIDATE_CATEGORIES
+from src.data import preprocess as preprocess_module
+from src.db.schema import (
+    bureaux,
+    categories,
+    communes,
+    create_schema,
+    elections,
+    get_engine,
+    results_local,
+    results_national,
+)
+from src.features import build_features
+LOGGER = logging.getLogger(__name__)
+TARGET_COLS = [f"target_share_{c}" for c in CANDIDATE_CATEGORIES]
+ID_COLS = ["commune_code", "code_bv", "election_type", "election_year", "round", "date_scrutin"]
+def load_panel(input_path: Path) -> pd.DataFrame:
+    if not input_path.exists():
+        raise FileNotFoundError(f"Dataset panel introuvable : {input_path}")
+    if input_path.suffix == ".parquet":
+        return pd.read_parquet(input_path)
+    return pd.read_csv(input_path, sep=";")
+def ensure_panel_exists(panel_path: Path, elections_long_path: Path, mapping_path: Path) -> pd.DataFrame:
+    if panel_path.exists():
+        return load_panel(panel_path)
+    LOGGER.info("Panel manquant, tentative de reconstruction via preprocess + build_features.")
+    if not elections_long_path.exists():
+        preprocess_module.preprocess_all(Path("data/raw"), elections_long_path.parent, preprocess_module.DEFAULT_META_CONFIG)
+    build_features.build_panel(elections_long_path, mapping_path, panel_path, csv_output=None)
+    return load_panel(panel_path)
+def check_mass(panel: pd.DataFrame, tolerance: float = 0.05) -> None:
+    sums = panel[TARGET_COLS].sum(axis=1)
+    bad = panel[(sums < (1 - tolerance)) | (sums > (1 + tolerance))]
+    if not bad.empty:
+        LOGGER.warning("Somme des parts hors intervalle attendu pour %s lignes (tol=%s).", len(bad), tolerance)
+def melt_panel(panel: pd.DataFrame) -> pd.DataFrame:
+    long_df = panel.melt(id_vars=ID_COLS + ["turnout_pct"], value_vars=TARGET_COLS, var_name="category", value_name="share")
+    long_df["category"] = long_df["category"].str.replace("target_share_", "", regex=False)
+    return long_df
+def _upsert_simple(conn, table, rows: Iterable[dict], index_elements: Iterable[str]) -> None:
+    stmt = insert(table).values(list(rows))
+    stmt = stmt.on_conflict_do_nothing(index_elements=list(index_elements))
+    if rows:
+        conn.execute(stmt)
+def ingest(panel: pd.DataFrame, engine) -> None:
+    check_mass(panel)
+    panel = panel.copy()
+    panel["round"] = panel["round"].fillna(1).astype(int)
+    panel["date_scrutin"] = pd.to_datetime(panel["date_scrutin"]).dt.date
+    long_df = melt_panel(panel)
+    long_df = long_df[long_df["category"].isin(CANDIDATE_CATEGORIES)]
+    long_df["share_pct"] = (long_df["share"].astype(float) * 100).round(6)
+    with engine.begin() as conn:
+        create_schema(conn)
+        LOGGER.info("Schéma vérifié.")
+        _upsert_simple(conn, categories, [{"name": cat} for cat in CANDIDATE_CATEGORIES], ["name"])
+        cat_map = dict(conn.execute(sa.select(categories.c.name, categories.c.id)))
+        commune_rows = [
+            {"name_normalized": code, "insee_code": code}
+            for code in sorted(long_df["commune_code"].dropna().unique())
+        ]
+        _upsert_simple(conn, communes, commune_rows, ["insee_code"])
+        commune_map = dict(conn.execute(sa.select(communes.c.insee_code, communes.c.id)))
+        def bureau_code_only(code_bv: str) -> str:
+            if "-" in str(code_bv):
+                parts = str(code_bv).split("-", 1)
+                return parts[1]
+            return str(code_bv)
+        bureau_rows = []
+        for _, row in long_df.drop_duplicates(subset=["commune_code", "code_bv"]).iterrows():
+            commune_id = commune_map.get(row["commune_code"])
+            if commune_id is None:
+                continue
+            bureau_rows.append(
+                {
+                    "commune_id": commune_id,
+                    "bureau_code": bureau_code_only(row["code_bv"]),
+                    "bureau_label": None,
+                }
+            )
+        _upsert_simple(conn, bureaux, bureau_rows, ["commune_id", "bureau_code"])
+        bureau_map = {
+            (commune_id, bureau_code): bureau_id
+            for bureau_id, commune_id, bureau_code in conn.execute(
+                sa.select(bureaux.c.id, bureaux.c.commune_id, bureaux.c.bureau_code)
+            )
+        }
+        election_rows = []
+        for _, row in panel.drop_duplicates(subset=["election_type", "election_year", "round"]).iterrows():
+            election_rows.append(
+                {
+                    "election_type": row["election_type"],
+                    "election_year": int(row["election_year"]),
+                    "round": int(row["round"]) if not pd.isna(row["round"]) else None,
+                    "date": row["date_scrutin"],
+                }
+            )
+        _upsert_simple(conn, elections, election_rows, ["election_type", "election_year", "round"])
+        election_map: Dict[Tuple[str, int, int], int] = {
+            (etype, year, int(round_) if round_ is not None else 1): eid
+            for eid, etype, year, round_ in conn.execute(
+                sa.select(elections.c.id, elections.c.election_type, elections.c.election_year, elections.c.round)
+            )
+        }
+        local_rows = []
+        for row in long_df.itertuples(index=False):
+            commune_id = commune_map.get(row.commune_code)
+            if commune_id is None:
+                continue
+            bureau_id = bureau_map.get((commune_id, bureau_code_only(row.code_bv)))
+            election_id = election_map.get((row.election_type, int(row.election_year), int(row.round)))
+            category_id = cat_map.get(row.category)
+            if None in (bureau_id, election_id, category_id):
+                continue
+            turnout_pct = None if pd.isna(row.turnout_pct) else float(row.turnout_pct) * 100
+            local_rows.append(
+                {
+                    "bureau_id": bureau_id,
+                    "election_id": election_id,
+                    "category_id": category_id,
+                    "share_pct": None if pd.isna(row.share_pct) else float(row.share_pct),
+                    "votes": None,
+                    "expressed": None,
+                    "turnout_pct": turnout_pct,
+                }
+            )
+        if local_rows:
+            stmt = insert(results_local).values(local_rows)
+            stmt = stmt.on_conflict_do_update(
+                index_elements=["bureau_id", "election_id", "category_id"],
+                set_={
+                    "share_pct": stmt.excluded.share_pct,
+                    "votes": stmt.excluded.votes,
+                    "expressed": stmt.excluded.expressed,
+                    "turnout_pct": stmt.excluded.turnout_pct,
+                },
+            )
+            conn.execute(stmt)
+        LOGGER.info("Résultats locaux insérés/mis à jour : %s lignes", len(local_rows))
+        nat_rows = []
+        nat = (
+            long_df.groupby(["election_type", "election_year", "round", "category"], as_index=False)
+            .agg(share=("share_pct", "mean"))
+            .rename(columns={"share": "share_pct"})
+        )
+        # Participation moyenne par scrutin
+        turnout_nat = panel.groupby(["election_type", "election_year", "round"], as_index=False)["turnout_pct"].mean()
+        nat = nat.merge(turnout_nat, on=["election_type", "election_year", "round"], how="left")
+        for row in nat.itertuples(index=False):
+            election_id = election_map.get((row.election_type, int(row.election_year), int(row.round)))
+            category_id = cat_map.get(row.category)
+            if None in (election_id, category_id):
+                continue
+            nat_rows.append(
+                {
+                    "election_id": election_id,
+                    "category_id": category_id,
+                    "share_pct": None if pd.isna(row.share_pct) else float(row.share_pct),
+                    "votes": None,
+                    "expressed": None,
+                    "turnout_pct": None if pd.isna(row.turnout_pct) else float(row.turnout_pct * 100),
+                }
+            )
+        if nat_rows:
+            stmt = insert(results_national).values(nat_rows)
+            stmt = stmt.on_conflict_do_update(
+                index_elements=["election_id", "category_id"],
+                set_={
+                    "share_pct": stmt.excluded.share_pct,
+                    "votes": stmt.excluded.votes,
+                    "expressed": stmt.excluded.expressed,
+                    "turnout_pct": stmt.excluded.turnout_pct,
+                },
+            )
+            conn.execute(stmt)
+        LOGGER.info("Référentiels nationaux insérés/mis à jour : %s lignes", len(nat_rows))
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Ingestion du panel harmonisé dans PostgreSQL.")
+    parser.add_argument("--input", type=Path, default=Path("data/processed/panel.parquet"), help="Chemin vers le panel parquet.")
+    parser.add_argument(
+        "--elections-long",
+        type=Path,
+        default=Path("data/interim/elections_long.parquet"),
+        help="Format long (fallback pour reconstruire le panel).",
+    )
+    parser.add_argument(
+        "--mapping",
+        type=Path,
+        default=Path("data/mapping_candidats_blocs.csv"),
+        help="Mapping nuance -> catégorie (fallback).",
+    )
+    return parser.parse_args()
+def main() -> None:
+    logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s")
+    args = parse_args()
+    panel = ensure_panel_exists(args.input, args.elections_long, args.mapping)
+    engine = get_engine()
+    ingest(panel, engine)
+if __name__ == "__main__":
+    main()

src/db/schema.py ADDED Viewed

	@@ -0,0 +1,95 @@

+from __future__ import annotations
+import os
+from typing import Optional
+import sqlalchemy as sa
+from sqlalchemy import Column, Date, Float, ForeignKey, Integer, MetaData, String, Table, UniqueConstraint
+from sqlalchemy.engine import Engine
+metadata = MetaData()
+communes = Table(
+    "communes",
+    metadata,
+    Column("id", Integer, primary_key=True, autoincrement=True),
+    Column("name_normalized", String(255), nullable=True),
+    Column("insee_code", String(12), nullable=False, unique=True, index=True),
+)
+bureaux = Table(
+    "bureaux",
+    metadata,
+    Column("id", Integer, primary_key=True, autoincrement=True),
+    Column("commune_id", Integer, ForeignKey("communes.id"), nullable=False),
+    Column("bureau_code", String(32), nullable=False),
+    Column("bureau_label", String(255), nullable=True),
+    UniqueConstraint("commune_id", "bureau_code", name="uq_bureau_commune_code"),
+)
+elections = Table(
+    "elections",
+    metadata,
+    Column("id", Integer, primary_key=True, autoincrement=True),
+    Column("election_type", String(32), nullable=False),
+    Column("election_year", Integer, nullable=False),
+    Column("round", Integer, nullable=True),
+    Column("date", Date, nullable=True),
+    UniqueConstraint("election_type", "election_year", "round", name="uq_election_unique"),
+)
+categories = Table(
+    "categories",
+    metadata,
+    Column("id", Integer, primary_key=True, autoincrement=True),
+    Column("name", String(64), nullable=False, unique=True),
+)
+results_local = Table(
+    "results_local",
+    metadata,
+    Column("id", Integer, primary_key=True, autoincrement=True),
+    Column("bureau_id", Integer, ForeignKey("bureaux.id"), nullable=False),
+    Column("election_id", Integer, ForeignKey("elections.id"), nullable=False),
+    Column("category_id", Integer, ForeignKey("categories.id"), nullable=False),
+    Column("share_pct", Float, nullable=True),
+    Column("votes", Float, nullable=True),
+    Column("expressed", Float, nullable=True),
+    Column("turnout_pct", Float, nullable=True),
+    UniqueConstraint("bureau_id", "election_id", "category_id", name="uq_local_bureau_election_category"),
+)
+results_national = Table(
+    "results_national",
+    metadata,
+    Column("id", Integer, primary_key=True, autoincrement=True),
+    Column("election_id", Integer, ForeignKey("elections.id"), nullable=False),
+    Column("category_id", Integer, ForeignKey("categories.id"), nullable=False),
+    Column("share_pct", Float, nullable=True),
+    Column("votes", Float, nullable=True),
+    Column("expressed", Float, nullable=True),
+    Column("turnout_pct", Float, nullable=True),
+    UniqueConstraint("election_id", "category_id", name="uq_nat_election_category"),
+)
+def _build_url_from_env() -> Optional[str]:
+    user = os.getenv("DB_USER") or os.getenv("POSTGRES_USER")
+    password = os.getenv("DB_PASSWORD") or os.getenv("POSTGRES_PASSWORD")
+    host = os.getenv("DB_HOST", "localhost")
+    port = os.getenv("DB_PORT", os.getenv("POSTGRES_PORT", "5432"))
+    db_name = os.getenv("DB_NAME") or os.getenv("POSTGRES_DB")
+    if user and password and db_name:
+        return f"postgresql+psycopg2://{user}:{password}@{host}:{port}/{db_name}"
+    return None
+def get_engine(url: Optional[str] = None) -> Engine:
+    db_url = url or os.getenv("DATABASE_URL") or _build_url_from_env()
+    if not db_url:
+        raise RuntimeError("DATABASE_URL or DB_* env vars must be set.")
+    return sa.create_engine(db_url)
+def create_schema(engine: Engine) -> None:
+    metadata.create_all(engine)

src/features/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@

+"""
+Feature engineering subpackage.
+"""

src/features/build_features.py ADDED Viewed

	@@ -0,0 +1,570 @@

+from __future__ import annotations
+import argparse
+import logging
+import re
+import unicodedata
+from functools import reduce
+from pathlib import Path
+from typing import Dict, Iterable, List
+import numpy as np
+import pandas as pd
+from src.constants import CANDIDATE_CATEGORIES
+LOGGER = logging.getLogger(__name__)
+INDEX_COLS = [
+    "commune_code",
+    "code_bv",
+    "election_type",
+    "election_year",
+    "round",
+    "date_scrutin",
+]
+PRESIDENTIAL_NAME_TO_CATEGORY = {
+    "arthaud": "extreme_gauche",
+    "poutou": "extreme_gauche",
+    "melenchon": "gauche_dure",
+    "roussel": "gauche_dure",
+    "hidalgo": "gauche_modere",
+    "jadot": "gauche_modere",
+    "hamon": "gauche_modere",
+    "macron": "centre",
+    "lassalle": "centre",
+    "cheminade": "centre",
+    "pecresse": "droite_modere",
+    "fillon": "droite_modere",
+    "dupontaignan": "droite_dure",
+    "asselineau": "droite_dure",
+    "lepen": "extreme_droite",
+    "zemmour": "extreme_droite",
+}
+EUROPEAN_LIST_KEYWORDS: list[tuple[str, str]] = [
+    ("rassemblementnational", "extreme_droite"),
+    ("lepen", "extreme_droite"),
+    ("republiqueenmarche", "centre"),
+    ("renaissance", "centre"),
+    ("modem", "centre"),
+    ("franceinsoumise", "gauche_dure"),
+    ("lutteouvriere", "extreme_gauche"),
+    ("revolutionnairecommunistes", "extreme_gauche"),
+    ("communiste", "gauche_dure"),
+    ("deboutlafrance", "droite_dure"),
+    ("dupontaignan", "droite_dure"),
+    ("frexit", "droite_dure"),
+    ("patriotes", "droite_dure"),
+    ("uniondeladroite", "droite_modere"),
+    ("droiteetducentre", "droite_modere"),
+    ("printempseuropeen", "gauche_modere"),
+    ("generation", "gauche_modere"),
+    ("animaliste", "gauche_modere"),
+    ("ecolog", "gauche_modere"),
+    ("federaliste", "centre"),
+    ("pirate", "centre"),
+    ("citoyenseuropeens", "centre"),
+    ("leseuropeens", "centre"),
+    ("lesoubliesdeleurope", "centre"),
+    ("initiativecitoyenne", "centre"),
+    ("esperanto", "centre"),
+    ("europeauservicedespeuples", "droite_dure"),
+    ("franceroyale", "extreme_droite"),
+    ("pourleuropedesgens", "gauche_dure"),
+    ("allonsenfants", "droite_modere"),
+    ("alliancejaune", "centre"),
+    ("giletsjaunes", "centre"),
+]
+def normalize_category(label: str | None) -> str | None:
+    if label is None:
+        return None
+    norm = str(label).strip().lower().replace(" ", "_").replace("-", "_")
+    synonyms = {
+        "doite_dure": "droite_dure",
+        "droite_moderee": "droite_modere",
+        "gauche_moderee": "gauche_modere",
+        "extreme_gauche": "extreme_gauche",
+        "extreme_droite": "extreme_droite",
+        "divers": None,
+        "gauche": "gauche_modere",
+        "droite": "droite_modere",
+    }
+    mapped = synonyms.get(norm, norm)
+    if mapped in CANDIDATE_CATEGORIES:
+        return mapped
+    return None
+def _normalize_code_series(series: pd.Series) -> pd.Series:
+    return (
+        series.astype("string")
+        .str.strip()
+        .str.upper()
+        .replace({"NAN": pd.NA, "NONE": pd.NA, "": pd.NA, "<NA>": pd.NA})
+    )
+def _normalize_person_name(value: str | None) -> str:
+    if value is None:
+        return ""
+    text = str(value).strip().lower()
+    if not text:
+        return ""
+    text = unicodedata.normalize("NFD", text)
+    text = "".join(ch for ch in text if unicodedata.category(ch) != "Mn")
+    return re.sub(r"[^a-z]", "", text)
+def _category_from_name(name: str | None) -> str | None:
+    norm = _normalize_person_name(name)
+    if not norm:
+        return None
+    for key, category in PRESIDENTIAL_NAME_TO_CATEGORY.items():
+        if key in norm:
+            return category
+    return None
+def _category_from_list_name(name: str | None) -> str | None:
+    norm = _normalize_person_name(name)
+    if not norm:
+        return None
+    for key, category in EUROPEAN_LIST_KEYWORDS:
+        if key in norm:
+            return category
+    return None
+def load_elections_long(path: Path, commune_code: str | None = None) -> pd.DataFrame:
+    if not path.exists():
+        raise FileNotFoundError(f"Fichier long introuvable : {path}")
+    if path.suffix == ".parquet":
+        df = pd.read_parquet(path)
+    else:
+        df = pd.read_csv(path, sep=";")
+    df["date_scrutin"] = pd.to_datetime(df["date_scrutin"])
+    df["annee"] = pd.to_numeric(df["annee"], errors="coerce").fillna(df["date_scrutin"].dt.year)
+    df["election_year"] = df["annee"]
+    df["tour"] = pd.to_numeric(df["tour"], errors="coerce")
+    df["round"] = df["tour"]
+    for col in ["exprimes", "votants", "inscrits", "voix", "blancs", "nuls"]:
+        if col in df.columns:
+            df[col] = pd.to_numeric(df[col], errors="coerce")
+    if "code_candidature" in df.columns:
+        df["code_candidature"] = _normalize_code_series(df["code_candidature"])
+    if "code_commune" in df.columns:
+        df["code_commune"] = (
+            df["code_commune"]
+            .astype(str)
+            .str.strip()
+            .str.replace(r"\.0$", "", regex=True)
+        )
+    else:
+        df["code_commune"] = df["code_bv"].astype(str).str.split("-").str[0]
+    if commune_code is not None:
+        df = df[df["code_commune"].astype(str) == str(commune_code)].copy()
+    df = _unpivot_wide_candidates(df)
+    if "code_candidature" in df.columns:
+        df["code_candidature"] = _normalize_code_series(df["code_candidature"])
+    df["type_scrutin"] = df["type_scrutin"].str.lower()
+    df["election_type"] = df["type_scrutin"]
+    return df
+def _unpivot_wide_candidates(df: pd.DataFrame) -> pd.DataFrame:
+    df = df.copy()
+    voix_cols = [c for c in df.columns if re.match(r"^Voix \d+$", str(c))]
+    if not voix_cols:
+        return df
+    wide_mask = df[voix_cols].notna().any(axis=1)
+    def _fill_unsuffixed_rows(local: pd.DataFrame) -> pd.DataFrame:
+        # Some datasets only expose unsuffixed columns (Voix, Code Nuance).
+        if "voix" in local.columns and "Voix" in local.columns:
+            missing_voix = local["voix"].isna() | (local["voix"] == 0)
+            local.loc[missing_voix, "voix"] = pd.to_numeric(
+                local.loc[missing_voix, "Voix"],
+                errors="coerce",
+            )
+        if "code_candidature" in local.columns:
+            if "Code Nuance" in local.columns:
+                local["code_candidature"] = local["code_candidature"].fillna(local["Code Nuance"])
+            if "Nuance" in local.columns:
+                local["code_candidature"] = local["code_candidature"].fillna(local["Nuance"])
+        if "nom_candidature" in local.columns:
+            if "Nom" in local.columns and "Prénom" in local.columns:
+                prenom = local["Prénom"].fillna("").astype(str).str.strip()
+                nom = local["Nom"].fillna("").astype(str).str.strip()
+                combined = (prenom + " " + nom).str.strip().replace("", pd.NA)
+                local["nom_candidature"] = local["nom_candidature"].fillna(combined)
+            elif "Nom" in local.columns:
+                local["nom_candidature"] = local["nom_candidature"].fillna(local["Nom"])
+        return local
+    if not wide_mask.any():
+        return _fill_unsuffixed_rows(df)
+    def _indexed_cols(pattern: str) -> Dict[int, str]:
+        mapping: Dict[int, str] = {}
+        for col in df.columns:
+            match = re.match(pattern, str(col))
+            if match:
+                mapping[int(match.group(1))] = col
+        return mapping
+    voice_map = _indexed_cols(r"^Voix (\d+)$")
+    code_map = _indexed_cols(r"^Code Nuance (\d+)$")
+    nuance_map = _indexed_cols(r"^Nuance (\d+)$")
+    for idx, col in nuance_map.items():
+        code_map.setdefault(idx, col)
+    if "voix" in df.columns:
+        voice_map.setdefault(1, "voix")
+    if "code_candidature" in df.columns:
+        code_map.setdefault(1, "code_candidature")
+    if not any(idx > 1 for idx in voice_map):
+        return df
+    drop_cols = {c for c in df.columns if re.search(r"\s\d+$", str(c))}
+    drop_cols.update({"voix", "code_candidature", "nom_candidature"})
+    base_cols = [c for c in df.columns if c not in drop_cols]
+    df_long = _fill_unsuffixed_rows(df[~wide_mask].copy())
+    df_wide = df[wide_mask].copy()
+    frames = []
+    def _compose_nom(idx: int) -> pd.Series | None:
+        series = pd.Series(pd.NA, index=df_wide.index, dtype="string")
+        etendu_col = f"Libellé Etendu Liste {idx}"
+        abrege_col = f"Libellé Abrégé Liste {idx}"
+        nom_col = f"Nom {idx}"
+        prenom_col = f"Prénom {idx}"
+        if etendu_col in df_wide.columns:
+            series = series.fillna(df_wide[etendu_col].astype("string"))
+        if abrege_col in df_wide.columns:
+            series = series.fillna(df_wide[abrege_col].astype("string"))
+        if nom_col in df_wide.columns and prenom_col in df_wide.columns:
+            prenom = df_wide[prenom_col].fillna("").astype(str).str.strip()
+            nom = df_wide[nom_col].fillna("").astype(str).str.strip()
+            combined = (prenom + " " + nom).str.strip().replace("", pd.NA)
+            series = series.fillna(combined)
+        elif nom_col in df_wide.columns:
+            series = series.fillna(df_wide[nom_col].astype("string"))
+        elif prenom_col in df_wide.columns:
+            series = series.fillna(df_wide[prenom_col].astype("string"))
+        if idx == 1 and "nom_candidature" in df_wide.columns:
+            series = series.fillna(df_wide["nom_candidature"].astype("string"))
+        if series.isna().all():
+            return None
+        return series
+    for idx in sorted(voice_map):
+        voix_col = voice_map[idx]
+        if voix_col not in df_wide.columns:
+            continue
+        temp = df_wide[base_cols].copy()
+        temp["voix"] = df_wide[voix_col]
+        code_candidates = []
+        if idx in code_map:
+            code_candidates.append(code_map[idx])
+        if idx in nuance_map and nuance_map[idx] not in code_candidates:
+            code_candidates.append(nuance_map[idx])
+        code_series = pd.Series(pd.NA, index=df_wide.index, dtype="string")
+        for candidate in code_candidates:
+            if candidate in df_wide.columns:
+                code_series = code_series.fillna(df_wide[candidate])
+        temp["code_candidature"] = code_series
+        nom_series = _compose_nom(idx)
+        if nom_series is not None:
+            temp["nom_candidature"] = nom_series
+        frames.append(temp)
+    if not frames:
+        return df
+    wide_long = pd.concat(frames, ignore_index=True)
+    wide_long["voix"] = pd.to_numeric(wide_long["voix"], errors="coerce")
+    wide_long = wide_long[wide_long["voix"].notna() & (wide_long["voix"] > 0)]
+    return pd.concat([df_long, wide_long], ignore_index=True)
+def _mapping_from_yaml(mapping_path: Path) -> pd.DataFrame:
+    try:
+        import yaml
+    except Exception as exc:
+        raise RuntimeError("PyYAML est requis pour charger un mapping YAML.") from exc
+    raw = yaml.safe_load(mapping_path.read_text()) or {}
+    if not isinstance(raw, dict):
+        raise ValueError("Mapping YAML invalide: attendu un dictionnaire.")
+    base_mapping = raw.get("base_mapping")
+    mapping_entries = raw.get("mapping")
+    overrides = raw.get("overrides", [])
+    mapping = pd.DataFrame()
+    if mapping_entries:
+        mapping = pd.DataFrame(mapping_entries)
+    elif base_mapping:
+        base_path = Path(base_mapping)
+        if not base_path.is_absolute():
+            base_path = mapping_path.parent / base_path
+        mapping = pd.read_csv(base_path, sep=";")
+    else:
+        mapping = pd.DataFrame(columns=["code_candidature", "nom_candidature", "bloc_1", "bloc_2", "bloc_3"])
+    if overrides:
+        override_df = pd.DataFrame(overrides)
+        if not override_df.empty:
+            if "blocs" in override_df.columns:
+                blocs = override_df["blocs"].apply(lambda v: v if isinstance(v, list) else [])
+                override_df["bloc_1"] = blocs.apply(lambda v: v[0] if len(v) > 0 else None)
+                override_df["bloc_2"] = blocs.apply(lambda v: v[1] if len(v) > 1 else None)
+                override_df["bloc_3"] = blocs.apply(lambda v: v[2] if len(v) > 2 else None)
+                override_df = override_df.drop(columns=["blocs"])
+            if "code_candidature" not in override_df.columns and "code" in override_df.columns:
+                override_df = override_df.rename(columns={"code": "code_candidature"})
+            if "nom_candidature" not in override_df.columns and "nom" in override_df.columns:
+                override_df = override_df.rename(columns={"nom": "nom_candidature"})
+            if "code_candidature" in mapping.columns:
+                mapping["code_candidature"] = _normalize_code_series(mapping["code_candidature"])
+            if "code_candidature" in override_df.columns:
+                override_df["code_candidature"] = _normalize_code_series(override_df["code_candidature"])
+            mapping = mapping.copy()
+            for _, row in override_df.iterrows():
+                code = row.get("code_candidature")
+                if code is None:
+                    continue
+                mask = mapping["code_candidature"] == code
+                if mask.any():
+                    for col in ["nom_candidature", "bloc_1", "bloc_2", "bloc_3"]:
+                        if col in row and pd.notna(row[col]):
+                            mapping.loc[mask, col] = row[col]
+                else:
+                    mapping = pd.concat([mapping, pd.DataFrame([row])], ignore_index=True)
+    return mapping
+def load_mapping(mapping_path: Path) -> pd.DataFrame:
+    if not mapping_path.exists():
+        raise FileNotFoundError(f"Mapping candidats/blocs manquant : {mapping_path}")
+    if mapping_path.suffix in {".yml", ".yaml"}:
+        mapping = _mapping_from_yaml(mapping_path)
+    else:
+        mapping = pd.read_csv(mapping_path, sep=";")
+    if "code_candidature" in mapping.columns:
+        mapping["code_candidature"] = _normalize_code_series(mapping["code_candidature"])
+    bloc_cols = [c for c in mapping.columns if c.startswith("bloc")]
+    for col in bloc_cols:
+        mapping[col] = mapping[col].apply(normalize_category)
+    return mapping
+def expand_by_category(elections_long: pd.DataFrame, mapping: pd.DataFrame) -> pd.DataFrame:
+    df = elections_long.merge(mapping, on="code_candidature", how="left", suffixes=("", "_map"))
+    records: list[dict] = []
+    for row in df.itertuples(index=False):
+        blocs = [getattr(row, col, None) for col in ["bloc_1", "bloc_2", "bloc_3"]]
+        blocs = [normalize_category(b) for b in blocs if isinstance(b, str) or b is not None]
+        blocs = [b for b in blocs if b is not None]
+        voix = getattr(row, "voix", 0) or 0
+        exprimes = getattr(row, "exprimes", np.nan)
+        votants = getattr(row, "votants", np.nan)
+        inscrits = getattr(row, "inscrits", np.nan)
+        blancs = getattr(row, "blancs", np.nan)
+        nuls = getattr(row, "nuls", np.nan)
+        if not blocs:
+            election_type = getattr(row, "election_type", None)
+            if election_type == "presidentielles":
+                nom = getattr(row, "nom_candidature", None)
+                mapped = _category_from_name(nom)
+                if mapped:
+                    blocs = [mapped]
+            elif election_type == "europeennes":
+                nom = getattr(row, "nom_candidature", None)
+                mapped = _category_from_list_name(nom)
+                if mapped:
+                    blocs = [mapped]
+        if not blocs:
+            # Fallback explicite : non mappé -> centre (évite un panel vide)
+            blocs = ["centre"]
+        part = voix / len(blocs) if len(blocs) > 0 else 0
+        for bloc in blocs:
+            records.append(
+                {
+                    "commune_code": getattr(row, "code_commune"),
+                    "code_bv": getattr(row, "code_bv"),
+                    "election_type": getattr(row, "election_type"),
+                    "election_year": int(getattr(row, "election_year")),
+                    "round": int(getattr(row, "round")) if not pd.isna(getattr(row, "round")) else None,
+                    "date_scrutin": getattr(row, "date_scrutin"),
+                    "category": bloc,
+                    "voix_cat": part,
+                    "exprimes": exprimes,
+                    "votants": votants,
+                    "inscrits": inscrits,
+                    "blancs": blancs,
+                    "nuls": nuls,
+                }
+            )
+    return pd.DataFrame.from_records(records)
+def aggregate_by_event(df: pd.DataFrame) -> pd.DataFrame:
+    group_cols = INDEX_COLS + ["category"]
+    agg = (
+        df.groupby(group_cols, as_index=False)
+        .agg(
+            voix_cat=("voix_cat", "sum"),
+            exprimes=("exprimes", "max"),
+            votants=("votants", "max"),
+            inscrits=("inscrits", "max"),
+            blancs=("blancs", "max"),
+            nuls=("nuls", "max"),
+        )
+    )
+    agg["share"] = agg["voix_cat"] / agg["exprimes"].replace(0, np.nan)
+    base_inscrits = agg["inscrits"].replace(0, np.nan)
+    agg["turnout_pct"] = agg["votants"] / base_inscrits
+    agg["blancs_pct"] = agg["blancs"] / base_inscrits
+    agg["nuls_pct"] = agg["nuls"] / base_inscrits
+    return agg
+def compute_national_reference(local: pd.DataFrame) -> pd.DataFrame:
+    nat_group_cols = ["election_type", "election_year", "round", "category"]
+    nat = (
+        local.groupby(nat_group_cols, as_index=False)
+        .agg(
+            voix_cat=("voix_cat", "sum"),
+            exprimes=("exprimes", "sum"),
+            votants=("votants", "sum"),
+            inscrits=("inscrits", "sum"),
+        )
+    )
+    nat["share_nat"] = nat["voix_cat"] / nat["exprimes"].replace(0, np.nan)
+    nat["turnout_nat"] = nat["votants"] / nat["inscrits"].replace(0, np.nan)
+    return nat[nat_group_cols + ["share_nat", "turnout_nat"]]
+def add_lags(local: pd.DataFrame) -> pd.DataFrame:
+    df = local.sort_values("date_scrutin").copy()
+    df["share_lag_any"] = df.groupby(["code_bv", "category"])["share"].shift(1)
+    df["share_lag2_any"] = df.groupby(["code_bv", "category"])["share"].shift(2)
+    df["share_lag_same_type"] = df.groupby(["code_bv", "category", "election_type"])["share"].shift(1)
+    df["dev_to_nat"] = df["share"] - df["share_nat"]
+    df["dev_to_nat_lag_any"] = df.groupby(["code_bv", "category"])["dev_to_nat"].shift(1)
+    df["dev_to_nat_lag_same_type"] = df.groupby(["code_bv", "category", "election_type"])["dev_to_nat"].shift(1)
+    df["swing_any"] = df["share_lag_any"] - df["share_lag2_any"]
+    return df
+def _pivot_feature(df: pd.DataFrame, value_col: str, prefix: str) -> pd.DataFrame:
+    pivot = df.pivot_table(index=INDEX_COLS, columns="category", values=value_col)
+    pivot = pivot[[c for c in pivot.columns if c in CANDIDATE_CATEGORIES]]
+    pivot.columns = [f"{prefix}{c}" for c in pivot.columns]
+    pivot = pivot.reset_index()
+    return pivot
+def build_panel(
+    elections_long_path: Path,
+    mapping_path: Path,
+    output_path: Path,
+    *,
+    csv_output: Path | None = None,
+) -> pd.DataFrame:
+    elections_long = load_elections_long(elections_long_path)
+    mapping = load_mapping(mapping_path)
+    expanded = expand_by_category(elections_long, mapping)
+    local = aggregate_by_event(expanded)
+    nat = compute_national_reference(local)
+    local = local.merge(nat, on=["election_type", "election_year", "round", "category"], how="left")
+    local = add_lags(local)
+    turnout_event = (
+        local.groupby(INDEX_COLS, as_index=False)["turnout_pct"].max().sort_values("date_scrutin")
+    )
+    turnout_event["prev_turnout_any_lag1"] = turnout_event.groupby("code_bv")["turnout_pct"].shift(1)
+    turnout_event["prev_turnout_same_type_lag1"] = turnout_event.groupby(["code_bv", "election_type"])[
+        "turnout_pct"
+    ].shift(1)
+    datasets: List[pd.DataFrame] = [
+        _pivot_feature(local, "share", "target_share_"),
+        _pivot_feature(local, "share_lag_any", "prev_share_any_lag1_"),
+        _pivot_feature(local, "share_lag_same_type", "prev_share_type_lag1_"),
+        _pivot_feature(local, "dev_to_nat_lag_any", "prev_dev_to_national_any_lag1_"),
+        _pivot_feature(local, "dev_to_nat_lag_same_type", "prev_dev_to_national_type_lag1_"),
+        _pivot_feature(local, "swing_any", "swing_any_"),
+    ]
+    panel = reduce(lambda left, right: left.merge(right, on=INDEX_COLS, how="left"), datasets)
+    panel = panel.merge(
+        turnout_event[INDEX_COLS + ["turnout_pct", "prev_turnout_any_lag1", "prev_turnout_same_type_lag1"]],
+        on=INDEX_COLS,
+        how="left",
+    )
+    target_cols = [f"target_share_{c}" for c in CANDIDATE_CATEGORIES]
+    for col in target_cols:
+        if col not in panel.columns:
+            panel[col] = 0.0
+    panel[target_cols] = panel[target_cols].fillna(0).clip(lower=0, upper=1)
+    panel["target_sum_before_renorm"] = panel[target_cols].sum(axis=1)
+    has_mass = panel["target_sum_before_renorm"] > 0
+    panel.loc[has_mass, target_cols] = panel.loc[has_mass, target_cols].div(
+        panel.loc[has_mass, "target_sum_before_renorm"], axis=0
+    )
+    panel["target_sum_after_renorm"] = panel[target_cols].sum(axis=1)
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    panel.to_parquet(output_path, index=False)
+    if csv_output:
+        panel.to_csv(csv_output, sep=";", index=False)
+    LOGGER.info("Panel enregistré dans %s (%s lignes)", output_path, len(panel))
+    return panel
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Construction du dataset panel features+cibles sans fuite temporelle.")
+    parser.add_argument(
+        "--elections-long",
+        type=Path,
+        default=Path("data/interim/elections_long.parquet"),
+        help="Chemin du format long harmonisé.",
+    )
+    parser.add_argument(
+        "--mapping",
+        type=Path,
+        default=Path("config/nuances.yaml"),
+        help="Mapping nuance -> catégorie.",
+    )
+    parser.add_argument(
+        "--output",
+        type=Path,
+        default=Path("data/processed/panel.parquet"),
+        help="Destination du parquet panel.",
+    )
+    parser.add_argument(
+        "--output-csv",
+        type=Path,
+        default=Path("data/processed/panel.csv"),
+        help="Destination CSV optionnelle.",
+    )
+    return parser.parse_args()
+def main() -> None:
+    logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s")
+    args = parse_args()
+    build_panel(args.elections_long, args.mapping, args.output, csv_output=args.output_csv)
+if __name__ == "__main__":
+    main()

src/model/predict.py ADDED Viewed

	@@ -0,0 +1,201 @@

+from __future__ import annotations
+import argparse
+import json
+import logging
+from pathlib import Path
+from typing import Dict, List
+import joblib
+import numpy as np
+import pandas as pd
+from src.constants import CANDIDATE_CATEGORIES
+from src.features.build_features import (
+    aggregate_by_event,
+    compute_national_reference,
+    expand_by_category,
+    load_elections_long,
+    load_mapping,
+)
+LOGGER = logging.getLogger(__name__)
+def filter_history(df: pd.DataFrame, target_year: int, commune_code: str | None) -> pd.DataFrame:
+    df = df[df["annee"] < target_year]
+    if commune_code:
+        df = df[df["code_commune"] == commune_code]
+    return df
+def build_feature_matrix(
+    elections_long: pd.DataFrame,
+    mapping: pd.DataFrame,
+    target_type: str,
+    target_year: int,
+) -> pd.DataFrame:
+    expanded = expand_by_category(elections_long, mapping)
+    local = aggregate_by_event(expanded)
+    nat = compute_national_reference(local)
+    local = local.merge(nat, on=["election_type", "election_year", "round", "category"], how="left")
+    local["dev_to_nat"] = local["share"] - local["share_nat"]
+    local = local.sort_values("date_scrutin")
+    last_any_share = (
+        local.sort_values("date_scrutin").groupby(["code_bv", "category"])["share"].last()
+    )
+    last_any_dev = (
+        local.sort_values("date_scrutin").groupby(["code_bv", "category"])["dev_to_nat"].last()
+    )
+    last_type_share = (
+        local[local["election_type"] == target_type]
+        .sort_values("date_scrutin")
+        .groupby(["code_bv", "category"])["share"]
+        .last()
+    )
+    last_type_dev = (
+        local[local["election_type"] == target_type]
+        .sort_values("date_scrutin")
+        .groupby(["code_bv", "category"])["dev_to_nat"]
+        .last()
+    )
+    # Swing entre les deux derniers scrutins tous types
+    swing_any = (
+        local.groupby(["code_bv", "category"])["share"]
+        .apply(lambda s: s.iloc[-1] - s.iloc[-2] if len(s) >= 2 else np.nan)
+        .rename("swing_any")
+    )
+    turnout_any = local.groupby("code_bv")["turnout_pct"].last()
+    turnout_type = (
+        local[local["election_type"] == target_type]
+        .sort_values("date_scrutin")
+        .groupby("code_bv")["turnout_pct"]
+        .last()
+    )
+    bureaux = sorted(local["code_bv"].dropna().unique())
+    records: List[dict] = []
+    for code_bv in bureaux:
+        record = {
+            "commune_code": str(code_bv).split("-")[0],
+            "code_bv": code_bv,
+            "election_type": target_type,
+            "election_year": target_year,
+            "round": 1,
+            "date_scrutin": f"{target_year}-01-01",
+            "prev_turnout_any_lag1": turnout_any.get(code_bv, np.nan),
+            "prev_turnout_same_type_lag1": turnout_type.get(code_bv, np.nan),
+        }
+        for cat in CANDIDATE_CATEGORIES:
+            record[f"prev_share_any_lag1_{cat}"] = last_any_share.get((code_bv, cat), np.nan)
+            record[f"prev_share_type_lag1_{cat}"] = last_type_share.get((code_bv, cat), np.nan)
+            record[f"prev_dev_to_national_any_lag1_{cat}"] = last_any_dev.get((code_bv, cat), np.nan)
+            record[f"prev_dev_to_national_type_lag1_{cat}"] = last_type_dev.get((code_bv, cat), np.nan)
+            record[f"swing_any_{cat}"] = swing_any.get((code_bv, cat), np.nan)
+        records.append(record)
+    return pd.DataFrame.from_records(records)
+def compute_references(local: pd.DataFrame, target_year: int) -> Dict[str, Dict[str, float]]:
+    refs: Dict[str, Dict[str, float]] = {}
+    leg = (
+        local[(local["election_type"] == "legislatives") & (local["election_year"] < target_year)]
+        .sort_values("date_scrutin")
+        .groupby(["code_bv", "category"])
+        .last()
+    )
+    mun2020 = (
+        local[(local["election_type"] == "municipales") & (local["election_year"] == 2020)]
+        .sort_values("date_scrutin")
+        .groupby(["code_bv", "category"])
+        .last()
+    )
+    refs["leg"] = {(code_bv, cat): row["share"] for (code_bv, cat), row in leg.iterrows()}
+    refs["mun2020"] = {(code_bv, cat): row["share"] for (code_bv, cat), row in mun2020.iterrows()}
+    return refs
+def load_feature_columns(path: Path, df: pd.DataFrame) -> List[str]:
+    if path.exists():
+        return json.loads(path.read_text())
+    # fallback: use all non-target columns except identifiers
+    exclude = {"commune_code", "code_bv", "election_type", "election_year", "round", "date_scrutin"}
+    return [c for c in df.columns if c not in exclude]
+def predict(
+    model_path: Path,
+    feature_df: pd.DataFrame,
+    feature_cols: List[str],
+    refs: Dict[str, Dict[str, float]],
+) -> pd.DataFrame:
+    model = joblib.load(model_path)
+    # Align feature set with trained columns (add missing as NaN)
+    missing_cols = [c for c in feature_cols if c not in feature_df.columns]
+    for col in missing_cols:
+        feature_df[col] = np.nan
+    preds = model.predict(feature_df[feature_cols])
+    preds = np.clip(preds, 0, 1)
+    sums = preds.sum(axis=1, keepdims=True)
+    sums[sums == 0] = 1
+    preds = preds / sums
+    preds_pct = preds * 100
+    rows = []
+    for idx, row in feature_df.iterrows():
+        code_bv = row["code_bv"]
+        record = {
+            "commune_code": row["commune_code"],
+            "code_bv": code_bv,
+        }
+        for cat_idx, cat in enumerate(CANDIDATE_CATEGORIES):
+            pred_val = preds_pct[idx, cat_idx]
+            record[f"predicted_share_{cat}"] = round(float(pred_val), 2)
+            leg_ref = refs["leg"].get((code_bv, cat))
+            mun_ref = refs["mun2020"].get((code_bv, cat))
+            record[f"delta_leg_{cat}"] = "N/A" if leg_ref is None else round(float(pred_val - leg_ref * 100), 2)
+            record[f"delta_mun2020_{cat}"] = "N/A" if mun_ref is None else round(float(pred_val - mun_ref * 100), 2)
+        rows.append(record)
+    return pd.DataFrame(rows)
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Prédictions bureau par bureau pour une échéance cible.")
+    parser.add_argument("--model-path", type=Path, default=Path("models/hist_gradient_boosting.joblib"), help="Modèle entraîné.")
+    parser.add_argument("--feature-columns", type=Path, default=Path("models/feature_columns.json"), help="Colonnes de features attendues.")
+    parser.add_argument("--elections-long", type=Path, default=Path("data/interim/elections_long.parquet"), help="Historique long.")
+    parser.add_argument("--mapping", type=Path, default=Path("config/nuances.yaml"), help="Mapping nuances->catégories.")
+    parser.add_argument("--target-election-type", type=str, default="municipales", help="Type d'élection cible.")
+    parser.add_argument("--target-year", type=int, default=2026, help="Année cible.")
+    parser.add_argument("--commune-code", type=str, default="34301", help="Code commune à filtrer (Sete=34301).")
+    parser.add_argument("--output-dir", type=Path, default=Path("predictions"), help="Répertoire de sortie.")
+    args = parser.parse_args()
+    logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s")
+    elections_long = load_elections_long(args.elections_long)
+    elections_long = filter_history(elections_long, args.target_year, args.commune_code)
+    mapping = load_mapping(args.mapping)
+    feature_df = build_feature_matrix(elections_long, mapping, args.target_election_type, args.target_year)
+    if feature_df.empty:
+        raise RuntimeError("Aucune donnée historique disponible pour construire les features.")
+    feature_cols = load_feature_columns(args.feature_columns, feature_df)
+    refs = compute_references(
+        aggregate_by_event(expand_by_category(elections_long, mapping)).assign(
+            election_type=lambda d: d["election_type"]
+        ),
+        args.target_year,
+    )
+    preds_df = predict(args.model_path, feature_df, feature_cols, refs)
+    args.output_dir.mkdir(parents=True, exist_ok=True)
+    output_path = args.output_dir / f"pred_{args.target_election_type}_{args.target_year}_sete.csv"
+    preds_df.to_csv(output_path, index=False)
+    LOGGER.info("Prédictions écrites dans %s", output_path)
+if __name__ == "__main__":
+    main()

src/model/train.py ADDED Viewed

	@@ -0,0 +1,666 @@

+from __future__ import annotations
+import argparse
+import json
+import logging
+import sys
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Dict, List, Optional, Tuple
+import joblib
+import numpy as np
+import pandas as pd
+from sklearn.compose import ColumnTransformer
+from sklearn.base import BaseEstimator, RegressorMixin, clone
+from sklearn.ensemble import HistGradientBoostingClassifier, HistGradientBoostingRegressor
+from sklearn.impute import SimpleImputer
+from sklearn.linear_model import Ridge
+from sklearn.metrics import (
+    explained_variance_score,
+    mean_absolute_error,
+    mean_squared_error,
+    median_absolute_error,
+    r2_score,
+)
+from sklearn.model_selection import TimeSeriesSplit
+from sklearn.multioutput import MultiOutputRegressor
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import StandardScaler
+from sklearn.utils.validation import check_is_fitted
+# Ensure project root is on sys.path when running as a script
+PROJECT_ROOT = Path(__file__).resolve().parents[2]
+if str(PROJECT_ROOT) not in sys.path:
+    sys.path.append(str(PROJECT_ROOT))
+from src.constants import CANDIDATE_CATEGORIES
+LOGGER = logging.getLogger(__name__)
+TARGET_COLS = [f"target_share_{c}" for c in CANDIDATE_CATEGORIES]
+META_COLS = [
+    "commune_code",
+    "code_bv",
+    "election_type",
+    "election_year",
+    "round",
+    "date_scrutin",
+    "target_sum_before_renorm",
+    "target_sum_after_renorm",
+]
+MODEL_GRIDS: Dict[str, List[Dict[str, object]]] = {
+    "ridge": [
+        {"alpha": 0.1},
+        {"alpha": 1.0},
+        {"alpha": 10.0},
+        {"alpha": 50.0},
+    ],
+    "hist_gradient_boosting": [
+        {"max_depth": 3, "learning_rate": 0.08, "max_iter": 400, "min_samples_leaf": 30, "l2_regularization": 0.1},
+        {"max_depth": 4, "learning_rate": 0.05, "max_iter": 600, "min_samples_leaf": 20, "l2_regularization": 0.1},
+        {"max_depth": 4, "learning_rate": 0.1, "max_iter": 300, "min_samples_leaf": 50, "l2_regularization": 1.0},
+        {"max_depth": 6, "learning_rate": 0.05, "max_iter": 500, "min_samples_leaf": 40, "l2_regularization": 0.5},
+        {"max_depth": 3, "learning_rate": 0.05, "max_iter": 500, "min_samples_leaf": 80, "l2_regularization": 1.0},
+        {"max_depth": 3, "learning_rate": 0.04, "max_iter": 600, "min_samples_leaf": 120, "l2_regularization": 2.0},
+        {"max_depth": 2, "learning_rate": 0.08, "max_iter": 500, "min_samples_leaf": 150, "l2_regularization": 3.0},
+    ],
+    "lightgbm": [
+        {"n_estimators": 600, "learning_rate": 0.05, "num_leaves": 31, "subsample": 0.8, "colsample_bytree": 0.8},
+        {"n_estimators": 400, "learning_rate": 0.08, "num_leaves": 16, "min_child_samples": 30, "subsample": 0.7, "colsample_bytree": 0.7},
+    ],
+    "xgboost": [
+        {"n_estimators": 600, "learning_rate": 0.05, "max_depth": 6, "subsample": 0.8, "colsample_bytree": 0.8},
+        {"n_estimators": 400, "learning_rate": 0.08, "max_depth": 4, "subsample": 0.7, "colsample_bytree": 0.7},
+    ],
+    "two_stage_hgb": [
+        {
+            "clf_params": {"max_depth": 3, "learning_rate": 0.08, "max_iter": 300, "min_samples_leaf": 30, "l2_regularization": 0.1},
+            "reg_params": {"max_depth": 3, "learning_rate": 0.08, "max_iter": 400, "min_samples_leaf": 30, "l2_regularization": 0.1},
+            "epsilon": 1e-4,
+            "use_logit": True,
+            "use_proba": True,
+        },
+        {
+            "clf_params": {"max_depth": 2, "learning_rate": 0.1, "max_iter": 300, "min_samples_leaf": 60, "l2_regularization": 0.2},
+            "reg_params": {"max_depth": 2, "learning_rate": 0.08, "max_iter": 500, "min_samples_leaf": 60, "l2_regularization": 0.5},
+            "epsilon": 1e-4,
+            "use_logit": True,
+            "use_proba": True,
+        },
+    ],
+    "catboost": [
+        {"depth": 6, "learning_rate": 0.05, "iterations": 500},
+        {"depth": 4, "learning_rate": 0.08, "iterations": 400},
+    ],
+}
+@dataclass
+class SplitConfig:
+    train_end_year: int
+    valid_end_year: int
+    test_start_year: int
+def load_panel(path: Path) -> pd.DataFrame:
+    if not path.exists():
+        raise FileNotFoundError(f"Panel introuvable : {path}")
+    if path.suffix == ".parquet":
+        df = pd.read_parquet(path)
+    else:
+        df = pd.read_csv(path, sep=";")
+    df["election_year"] = pd.to_numeric(df["election_year"], errors="coerce")
+    df["round"] = pd.to_numeric(df["round"], errors="coerce")
+    return df
+def get_feature_columns(df: pd.DataFrame) -> List[str]:
+    exclude = set(TARGET_COLS + META_COLS)
+    candidates = [c for c in df.columns if c not in exclude]
+    numeric_feats = [c for c in candidates if pd.api.types.is_numeric_dtype(df[c])]
+    return numeric_feats
+def temporal_split(df: pd.DataFrame, cfg: SplitConfig) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
+    train = df[df["election_year"] <= cfg.train_end_year]
+    valid = df[(df["election_year"] > cfg.train_end_year) & (df["election_year"] <= cfg.valid_end_year)]
+    test = df[df["election_year"] >= cfg.test_start_year]
+    return train, valid, test
+def make_preprocessor(feature_cols: List[str]) -> ColumnTransformer:
+    return ColumnTransformer(
+        transformers=[
+            ("num", Pipeline([("imputer", SimpleImputer(strategy="median")), ("scaler", StandardScaler())]), feature_cols)
+        ],
+        remainder="drop",
+    )
+def normalize_predictions(y_pred: np.ndarray) -> np.ndarray:
+    y_pred = np.clip(y_pred, 0, 1)
+    sums = y_pred.sum(axis=1, keepdims=True)
+    sums[sums == 0] = 1
+    return y_pred / sums
+def regression_metrics(y_true: np.ndarray, y_pred: np.ndarray) -> Dict[str, float]:
+    y_true = np.asarray(y_true)
+    y_pred = np.asarray(y_pred)
+    y_pred = normalize_predictions(y_pred)
+    y_true_flat = y_true.reshape(-1)
+    y_pred_flat = y_pred.reshape(-1)
+    mae = float(mean_absolute_error(y_true_flat, y_pred_flat))
+    rmse = float(np.sqrt(mean_squared_error(y_true_flat, y_pred_flat)))
+    medae = float(median_absolute_error(y_true_flat, y_pred_flat))
+    r2 = float(r2_score(y_true_flat, y_pred_flat)) if len(y_true_flat) > 1 else np.nan
+    evs = float(explained_variance_score(y_true_flat, y_pred_flat)) if len(y_true_flat) > 1 else np.nan
+    denom = float(np.sum(np.abs(y_true_flat)))
+    wape = float(np.sum(np.abs(y_true_flat - y_pred_flat)) / denom) if denom > 0 else np.nan
+    smape = float(np.mean(2 * np.abs(y_pred_flat - y_true_flat) / (np.abs(y_true_flat) + np.abs(y_pred_flat) + 1e-9)))
+    bias = float(np.mean(y_pred_flat - y_true_flat))
+    winner_true = np.argmax(y_true, axis=1)
+    winner_pred = np.argmax(y_pred, axis=1)
+    winner_acc = float(np.mean(winner_true == winner_pred)) if len(winner_true) else np.nan
+    metrics = {
+        "mae_mean": mae,
+        "rmse": rmse,
+        "medae": medae,
+        "r2": r2,
+        "explained_var": evs,
+        "wape": wape,
+        "smape": smape,
+        "bias": bias,
+        "winner_accuracy": winner_acc,
+    }
+    for idx, cat in enumerate(CANDIDATE_CATEGORIES):
+        metrics[f"mae_{cat}"] = float(mean_absolute_error(y_true[:, idx], y_pred[:, idx]))
+    return metrics
+def build_event_folds(df: pd.DataFrame, n_splits: int) -> List[Tuple[np.ndarray, np.ndarray]]:
+    if df.empty:
+        return []
+    work = df.copy()
+    work["date_scrutin"] = pd.to_datetime(work.get("date_scrutin"), errors="coerce") # type: ignore
+    if work["date_scrutin"].isna().all():
+        work["date_scrutin"] = pd.to_datetime(work["election_year"], format="%Y", errors="coerce")
+    work["event_key"] = (
+        work["election_type"].astype(str).str.lower().str.strip()
+        + "|"
+        + work["election_year"].astype(str)
+        + "|"
+        + work["round"].astype(str)
+    )
+    events = (
+        work[["event_key", "date_scrutin"]]
+        .dropna(subset=["event_key", "date_scrutin"])
+        .drop_duplicates()
+        .sort_values("date_scrutin")
+        .reset_index(drop=True)
+    )
+    if len(events) < 2:
+        return []
+    max_splits = min(n_splits, len(events) - 1)
+    tscv = TimeSeriesSplit(n_splits=max_splits)
+    folds = []
+    for train_evt_idx, test_evt_idx in tscv.split(events):
+        train_keys = set(events.iloc[train_evt_idx]["event_key"])
+        test_keys = set(events.iloc[test_evt_idx]["event_key"])
+        train_idx = work.index[work["event_key"].isin(train_keys)].to_numpy()
+        test_idx = work.index[work["event_key"].isin(test_keys)].to_numpy()
+        folds.append((train_idx, test_idx))
+    return folds
+class TwoStageRegressor(BaseEstimator, RegressorMixin):
+    def __init__(
+        self,
+        classifier: Optional[BaseEstimator] = None,
+        regressor: Optional[BaseEstimator] = None,
+        epsilon: float = 1e-4,
+        positive_threshold: float = 0.5,
+        use_proba: bool = True,
+        use_logit: bool = True,
+        logit_eps: float = 1e-6,
+    ) -> None:
+        self.classifier = classifier
+        self.regressor = regressor
+        self.epsilon = epsilon
+        self.positive_threshold = positive_threshold
+        self.use_proba = use_proba
+        self.use_logit = use_logit
+        self.logit_eps = logit_eps
+    def _default_classifier(self) -> BaseEstimator:
+        return HistGradientBoostingClassifier(random_state=42)
+    def _default_regressor(self) -> BaseEstimator:
+        return HistGradientBoostingRegressor(random_state=42)
+    def fit(self, X, y):
+        y = np.asarray(y).ravel()
+        mask_pos = y > self.epsilon
+        self._constant_proba = None
+        if mask_pos.all() or (~mask_pos).all():
+            self._constant_proba = float(mask_pos.mean())
+            self.classifier_ = None
+        else:
+            classifier = self.classifier if self.classifier is not None else self._default_classifier()
+            self.classifier_ = clone(classifier)
+            self.classifier_.fit(X, mask_pos.astype(int))
+        self.regressor_ = None
+        if mask_pos.any():
+            regressor = self.regressor if self.regressor is not None else self._default_regressor()
+            self.regressor_ = clone(regressor)
+            y_reg = y[mask_pos]
+            if self.use_logit:
+                y_reg = np.clip(y_reg, self.logit_eps, 1 - self.logit_eps)
+                y_reg = np.log(y_reg / (1 - y_reg))
+            self.regressor_.fit(X[mask_pos], y_reg)
+        return self
+    def predict(self, X):
+        if self._constant_proba is not None:
+            proba = np.full(len(X), self._constant_proba, dtype=float)
+        else:
+            check_is_fitted(self, ["classifier_"])
+            if self.use_proba and hasattr(self.classifier_, "predict_proba"):
+                proba = self.classifier_.predict_proba(X)[:, 1] # type: ignore
+            else:
+                proba = self.classifier_.predict(X) # type: ignore
+        proba = np.asarray(proba, dtype=float)
+        if self.regressor_ is None:
+            reg_pred = np.zeros(len(proba), dtype=float)
+        else:
+            reg_pred = np.asarray(self.regressor_.predict(X), dtype=float)
+            if self.use_logit:
+                reg_pred = 1 / (1 + np.exp(-reg_pred))
+            reg_pred = np.clip(reg_pred, 0, 1)
+        if self.use_proba:
+            preds = proba * reg_pred
+        else:
+            preds = np.where(proba >= self.positive_threshold, reg_pred, 0.0)
+        return preds
+class CatBoostRegressorWrapper(BaseEstimator, RegressorMixin):
+    def __init__(self, **params: float | int | str):
+        self.params = dict(params)
+        self.model_ = None
+    def fit(self, X, y, **fit_params):
+        from catboost import CatBoostRegressor
+        self.model_ = CatBoostRegressor(**self.params) # type: ignore
+        self.model_.fit(X, y, **fit_params)
+        return self
+    def predict(self, X):
+        if self.model_ is None:
+            raise ValueError("CatBoostRegressorWrapper n'est pas entraîné.")
+        return self.model_.predict(X)
+    def get_params(self, deep: bool = True):
+        return dict(self.params)
+    def set_params(self, **params):
+        self.params.update(params)
+        return self
+def make_model(model_name: str, feature_cols: List[str], params: Dict[str, object]) -> Optional[Pipeline]:
+    preprocessor = make_preprocessor(feature_cols)
+    if model_name == "ridge":
+        estimator = Ridge(**params) # type: ignore
+    elif model_name == "hist_gradient_boosting":
+        estimator = HistGradientBoostingRegressor(random_state=42, **params) # type: ignore
+    elif model_name == "lightgbm":
+        try:
+            from lightgbm import LGBMRegressor
+        except Exception:
+            LOGGER.info("LightGBM indisponible, ignoré.")
+            return None
+        estimator = LGBMRegressor(random_state=42, force_row_wise=True, verbosity=-1, **params) # type: ignore
+    elif model_name == "xgboost":
+        try:
+            from xgboost import XGBRegressor
+        except Exception:
+            LOGGER.info("XGBoost indisponible, ignoré.")
+            return None
+        estimator = XGBRegressor(random_state=42, **params)
+    elif model_name == "two_stage_hgb":
+        clf_params = params.get("clf_params", {})
+        reg_params = params.get("reg_params", {})
+        estimator = TwoStageRegressor(
+            classifier=HistGradientBoostingClassifier(random_state=42, **clf_params), # type: ignore
+            regressor=HistGradientBoostingRegressor(random_state=42, **reg_params), # type: ignore
+            epsilon=params.get("epsilon", 1e-4), # type: ignore
+            positive_threshold=params.get("positive_threshold", 0.5), # type: ignore
+            use_proba=bool(params.get("use_proba", True)),
+            use_logit=bool(params.get("use_logit", True)),
+            logit_eps=params.get("logit_eps", 1e-6), # type: ignore
+        )
+    elif model_name == "catboost":
+        try:
+            from catboost import CatBoostRegressor
+        except Exception:
+            LOGGER.info("CatBoost indisponible, ignoré.")
+            return None
+        if not hasattr(CatBoostRegressor, "__sklearn_tags__"):
+            estimator = CatBoostRegressorWrapper(verbose=0, random_state=42, **params) # type: ignore
+        else:
+            estimator = CatBoostRegressor(verbose=0, random_state=42, **params) # type: ignore
+    else:
+        raise ValueError(f"Modèle inconnu: {model_name}")
+    # n_jobs=1 to avoid process-based parallelism issues in some environments.
+    model = MultiOutputRegressor(estimator, n_jobs=1) # type: ignore
+    return Pipeline(
+        steps=[
+            ("preprocess", preprocessor),
+            ("model", model),
+        ]
+    )
+def evaluate(model: Pipeline, X, y_true: np.ndarray) -> Dict[str, float]:
+    if X is None or len(X) == 0:
+        return {"mae_mean": np.nan}
+    y_pred = model.predict(X)
+    return regression_metrics(y_true, y_pred) # type: ignore
+def evaluate_cv(
+    model: Pipeline,
+    df: pd.DataFrame,
+    feature_cols: List[str],
+    n_splits: int,
+    target_cols: List[str],
+) -> Dict[str, float]:
+    folds = build_event_folds(df, n_splits)
+    if not folds:
+        return {"folds_used": 0}
+    metrics_acc: Dict[str, list[float]] = {}
+    for train_idx, test_idx in folds:
+        model_clone = clone(model)
+        X_train = df.iloc[train_idx][feature_cols]
+        y_train = df.iloc[train_idx][target_cols].values
+        X_test = df.iloc[test_idx][feature_cols]
+        y_test = df.iloc[test_idx][target_cols].values
+        model_clone.fit(X_train, y_train)
+        fold_metrics = evaluate(model_clone, X_test, y_test)
+        for key, value in fold_metrics.items():
+            metrics_acc.setdefault(key, []).append(value)
+    summary = {f"cv_{k}": float(np.nanmean(v)) for k, v in metrics_acc.items()}
+    summary["folds_used"] = len(folds)
+    return summary
+def compute_cv_residual_intervals(
+    model: Pipeline,
+    df: pd.DataFrame,
+    feature_cols: List[str],
+    target_cols: List[str],
+    n_splits: int,
+    quantiles: Tuple[float, ...] = (0.05, 0.1, 0.9, 0.95),
+) -> Dict[str, object]:
+    folds = build_event_folds(df, n_splits)
+    if not folds:
+        return {"folds_used": 0, "quantiles": list(quantiles), "residuals": {}}
+    residuals_by_cat: Dict[str, list[float]] = {cat: [] for cat in CANDIDATE_CATEGORIES}
+    for train_idx, test_idx in folds:
+        model_clone = clone(model)
+        X_train = df.iloc[train_idx][feature_cols]
+        y_train = df.iloc[train_idx][target_cols].values
+        X_test = df.iloc[test_idx][feature_cols]
+        y_test = df.iloc[test_idx][target_cols].values
+        model_clone.fit(X_train, y_train)
+        y_pred = model_clone.predict(X_test)
+        y_pred = normalize_predictions(y_pred)
+        resid = y_pred - y_test
+        for idx, cat in enumerate(CANDIDATE_CATEGORIES):
+            residuals_by_cat[cat].extend(resid[:, idx].tolist())
+    quantile_keys = [f"q{int(q * 100):02d}" for q in quantiles]
+    summary: Dict[str, Dict[str, float]] = {}
+    for cat, values in residuals_by_cat.items():
+        arr = np.asarray(values, dtype=float)
+        if arr.size == 0:
+            continue
+        q_vals = np.quantile(arr, quantiles).tolist()
+        entry = {key: float(val) for key, val in zip(quantile_keys, q_vals)}
+        entry["mean"] = float(np.mean(arr))
+        entry["std"] = float(np.std(arr))
+        entry["n"] = int(arr.size)
+        summary[cat] = entry
+    return {
+        "folds_used": len(folds),
+        "quantiles": list(quantiles),
+        "residuals": summary,
+    }
+def add_cv_selection_helpers(cv_summary: pd.DataFrame) -> pd.DataFrame:
+    work = cv_summary.copy()
+    block_cols = [c for c in work.columns if c.startswith("cv_mae_") and c != "cv_mae_mean"]
+    if block_cols:
+        work["worst_block_mae"] = work[block_cols].max(axis=1)
+    if "cv_bias" in work.columns:
+        work["bias_abs"] = work["cv_bias"].abs()
+    return work
+def select_best_model(cv_summary: pd.DataFrame) -> Tuple[str, Dict[str, object]]:
+    if cv_summary.empty:
+        raise RuntimeError("Aucun modèle évalué.")
+    work = add_cv_selection_helpers(cv_summary)
+    bias_threshold = 0.02
+    candidates = work
+    if "bias_abs" in work.columns:
+        filtered = work[work["bias_abs"] <= bias_threshold]
+        if not filtered.empty:
+            candidates = filtered
+    sort_cols = [c for c in ["cv_mae_mean", "worst_block_mae", "bias_abs", "cv_rmse", "cv_smape"] if c in candidates.columns]
+    best_row = candidates.sort_values(sort_cols, na_position="last").iloc[0]
+    return str(best_row["model"]), dict(best_row["params"])
+def save_metrics(
+    metrics: Dict[str, Dict[str, Dict[str, float]]],
+    output_dir: Path,
+    cv_summary: pd.DataFrame | None = None,
+) -> None:
+    output_dir.mkdir(parents=True, exist_ok=True)
+    with (output_dir / "metrics.json").open("w", encoding="utf-8") as f:
+        json.dump(metrics, f, indent=2)
+    if cv_summary is not None and not cv_summary.empty:
+        cv_summary.to_csv(output_dir / "cv_summary.csv", index=False)
+    lines = ["# Métriques (parts, 0-1)\n"]
+    for model_name, splits in metrics.items():
+        lines.append(f"## {model_name}")
+        for split, vals in splits.items():
+            lines.append(
+                f"- {split} mae_mean: {vals.get('mae_mean', float('nan')):.4f}, "
+                f"rmse: {vals.get('rmse', float('nan')):.4f}, "
+                f"wape: {vals.get('wape', float('nan')):.4f}, "
+                f"winner_acc: {vals.get('winner_accuracy', float('nan')):.3f}"
+            )
+        lines.append("")
+    (output_dir / "metrics.md").write_text("\n".join(lines), encoding="utf-8")
+def save_model_card(
+    model_name: str,
+    cfg: SplitConfig,
+    feature_cols: List[str],
+    metrics: Dict[str, Dict[str, Dict[str, float]]],
+    output_dir: Path,
+) -> None:
+    lines = [
+        "# Model card",
+        f"- Modèle: {model_name}",
+        f"- Split temporel: train<= {cfg.train_end_year}, valid<= {cfg.valid_end_year}, test>= {cfg.test_start_year}",
+        f"- Features: {len(feature_cols)} colonnes numériques (lags, écarts national, swing, turnout)",
+        "- Cibles: parts par bloc (7 catégories) renormalisées.",
+        "- Métriques principales (MAE moyen, jeux valid/test):",
+        f"  - Valid: {metrics[model_name]['valid'].get('mae_mean', float('nan')):.4f}",
+        f"  - Test: {metrics[model_name]['test'].get('mae_mean', float('nan')):.4f}",
+    ]
+    output_dir.mkdir(parents=True, exist_ok=True)
+    (output_dir / "model_card.md").write_text("\n".join(lines), encoding="utf-8")
+def plot_mae_per_category(model_name: str, mae_scores: Dict[str, float], output_dir: Path) -> None:
+    try:
+        import matplotlib.pyplot as plt
+    except Exception:
+        LOGGER.warning("Matplotlib indisponible, skip figure.")
+        return
+    if not all(f"mae_{c}" in mae_scores for c in CANDIDATE_CATEGORIES):
+        LOGGER.warning("Scores MAE par categorie indisponibles, skip figure.")
+        return
+    cats = CANDIDATE_CATEGORIES
+    values = [mae_scores[f"mae_{c}"] for c in cats]
+    plt.figure(figsize=(8, 4))
+    plt.bar(cats, values, color="#2c7fb8")
+    plt.xticks(rotation=30, ha="right")
+    plt.ylabel("MAE (part)")
+    plt.title(f"MAE par catégorie - {model_name}")
+    output_dir.mkdir(parents=True, exist_ok=True)
+    plt.tight_layout()
+    plt.savefig(output_dir / "mae_per_category.png")
+    plt.close()
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Entraînement et évaluation temporelle multi-blocs.")
+    parser.add_argument("--panel", type=Path, default=Path("data/processed/panel.parquet"), help="Dataset panel parquet.")
+    parser.add_argument("--models-dir", type=Path, default=Path("models"), help="Répertoire de sauvegarde des modèles.")
+    parser.add_argument("--reports-dir", type=Path, default=Path("reports"), help="Répertoire de sortie des rapports.")
+    parser.add_argument("--train-end-year", type=int, default=2019, help="Dernière année incluse dans le train.")
+    parser.add_argument("--valid-end-year", type=int, default=2021, help="Dernière année incluse dans la validation.")
+    parser.add_argument("--test-start-year", type=int, default=2022, help="Première année du test (inclusif).")
+    parser.add_argument("--cv-splits", type=int, default=4, help="Nombre de folds temporels pour la CV par scrutin.")
+    parser.add_argument("--no-tune", action="store_true", help="Désactiver la recherche d'hyperparamètres.")
+    parser.add_argument("--max-trials", type=int, default=0, help="Limiter le nombre d'essais par modèle (0=all).")
+    parser.add_argument(
+        "--models",
+        nargs="+",
+        default=list(MODEL_GRIDS.keys()),
+        help="Liste des modèles à tester (ridge, hist_gradient_boosting, lightgbm, xgboost, two_stage_hgb, catboost).",
+    )
+    args = parser.parse_args()
+    logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s")
+    cfg = SplitConfig(train_end_year=args.train_end_year, valid_end_year=args.valid_end_year, test_start_year=args.test_start_year)
+    panel = load_panel(args.panel)
+    panel = panel.dropna(subset=TARGET_COLS)
+    feature_cols = get_feature_columns(panel)
+    all_na = [c for c in feature_cols if panel[c].isna().all()]
+    if all_na:
+        LOGGER.warning("Features supprimées car entièrement NA: %s", all_na)
+        feature_cols = [c for c in feature_cols if c not in all_na]
+    train_df, valid_df, test_df = temporal_split(panel, cfg)
+    train_valid_df = panel[panel["election_year"] < cfg.test_start_year].copy().reset_index(drop=True)
+    models_to_run = [m for m in args.models if m in MODEL_GRIDS]
+    if not models_to_run:
+        raise RuntimeError("Aucun modèle demandé n'est reconnu.")
+    cv_rows: List[Dict[str, object]] = []
+    if not args.no_tune:
+        rng = np.random.default_rng(42)
+        for model_name in models_to_run:
+            grid = MODEL_GRIDS[model_name]
+            if args.max_trials and len(grid) > args.max_trials:
+                indices = rng.choice(len(grid), size=args.max_trials, replace=False)
+                grid = [grid[i] for i in indices]
+            for params in grid:
+                model = make_model(model_name, feature_cols, params)
+                if model is None:
+                    continue
+                cv_metrics = evaluate_cv(model, train_valid_df, feature_cols, args.cv_splits, TARGET_COLS)
+                row = {"model": model_name, "params": params, **cv_metrics}
+                cv_rows.append(row)
+    cv_summary = pd.DataFrame(cv_rows)
+    if not cv_summary.empty:
+        cv_summary = cv_summary.dropna(subset=["cv_mae_mean"])
+        cv_summary = add_cv_selection_helpers(cv_summary)
+    if not cv_summary.empty:
+        best_model_name, best_params = select_best_model(cv_summary)
+        LOGGER.info("Meilleur modèle CV: %s %s", best_model_name, best_params)
+    else:
+        best_model_name = models_to_run[0]
+        best_params = MODEL_GRIDS[best_model_name][0]
+        LOGGER.warning("Pas de CV disponible, fallback sur %s %s", best_model_name, best_params)
+    residual_payload = {}
+    model_for_intervals = make_model(best_model_name, feature_cols, best_params)
+    if model_for_intervals is not None and not train_valid_df.empty:
+        residual_payload = compute_cv_residual_intervals(
+            model_for_intervals,
+            train_valid_df,
+            feature_cols,
+            TARGET_COLS,
+            args.cv_splits,
+        )
+        if residual_payload.get("residuals"):
+            args.reports_dir.mkdir(parents=True, exist_ok=True)
+            (args.reports_dir / "residual_intervals.json").write_text(
+                json.dumps(
+                    {
+                        "model": best_model_name,
+                        **residual_payload,
+                    },
+                    indent=2,
+                ),
+                encoding="utf-8",
+            )
+    X_train, y_train = train_df[feature_cols], train_df[TARGET_COLS].values
+    X_valid, y_valid = valid_df[feature_cols], valid_df[TARGET_COLS].values
+    X_test, y_test = test_df[feature_cols], test_df[TARGET_COLS].values
+    X_train_valid, y_train_valid = train_valid_df[feature_cols], train_valid_df[TARGET_COLS].values
+    eval_results: Dict[str, Dict[str, Dict[str, float]]] = {}
+    best_model_eval = make_model(best_model_name, feature_cols, best_params)
+    if best_model_eval is None:
+        raise RuntimeError(f"Modèle indisponible: {best_model_name}")
+    best_model_eval.fit(X_train, y_train)
+    eval_results[best_model_name] = {
+        "train": evaluate(best_model_eval, X_train, y_train),
+        "valid": evaluate(best_model_eval, X_valid, y_valid),
+        "test": evaluate(best_model_eval, X_test, y_test),
+        "train_valid": evaluate(best_model_eval, X_train_valid, y_train_valid),
+    }
+    best_model_final = make_model(best_model_name, feature_cols, best_params)
+    if best_model_final is None:
+        raise RuntimeError(f"Modèle indisponible: {best_model_name}")
+    best_model_final.fit(X_train_valid, y_train_valid)
+    args.models_dir.mkdir(parents=True, exist_ok=True)
+    joblib.dump(best_model_final, args.models_dir / f"{best_model_name}.joblib")
+    LOGGER.info("Modèle sauvegardé dans %s", args.models_dir / f"{best_model_name}.joblib")
+    (args.models_dir / "feature_columns.json").write_text(json.dumps(feature_cols, indent=2), encoding="utf-8")
+    (args.models_dir / "best_model.json").write_text(json.dumps({"name": best_model_name}, indent=2), encoding="utf-8")
+    save_metrics(eval_results, args.reports_dir, cv_summary=cv_summary)
+    plot_mae_per_category(best_model_name, eval_results[best_model_name]["test"], args.reports_dir / "figures")
+    save_model_card(best_model_name, cfg, feature_cols, eval_results, args.models_dir)
+if __name__ == "__main__":
+    main()

src/pipeline.py ADDED Viewed

	@@ -0,0 +1,435 @@

+from __future__ import annotations
+from pathlib import Path
+from typing import Iterable, Mapping, Optional
+import pandas as pd
+import re
+import yaml
+from .constants import CANDIDATE_CATEGORIES
+def normalize_bloc(bloc: str | None) -> str:
+    """
+    Map bloc labels to the canonical categories used across the project.
+    """
+    if bloc is None:
+        return "centre"
+    norm = str(bloc).strip().lower().replace(" ", "_").replace("-", "_")
+    synonyms = {
+        "droite_moderee": "droite_modere",
+        "gauche_moderee": "gauche_modere",
+        "doite_dure": "droite_dure",
+        "gauche": "gauche_modere",
+        "droite": "droite_modere",
+        "divers": "centre",
+        "divers_droite": "droite_modere",
+        "divers_gauche": "gauche_modere",
+        "divers_centre": "centre",
+        "extreme_gauche": "extreme_gauche",
+        "extreme_droite": "extreme_droite",
+    }
+    norm = synonyms.get(norm, norm)
+    if norm not in CANDIDATE_CATEGORIES:
+        return "centre"
+    return norm
+DEFAULT_COMMUNES_PATH = (Path(__file__).resolve().parents[1] / "config" / "communes.yaml")
+def _normalize_insee_code(value: str | int | None) -> str:
+    if value is None:
+        return ""
+    cleaned = (
+        str(value)
+        .strip()
+        .replace(".0", "")
+    )
+    cleaned = re.sub(r"\D", "", cleaned)
+    if not cleaned:
+        return ""
+    if len(cleaned) >= 5:
+        return cleaned[:5]
+    return cleaned.zfill(5)
+def load_target_communes(path: Path = DEFAULT_COMMUNES_PATH) -> dict[str, str]:
+    if not path.exists():
+        raise FileNotFoundError(f"Fichier communes introuvable: {path}")
+    raw = yaml.safe_load(path.read_text()) or {}
+    entries = raw.get("communes", raw) if isinstance(raw, dict) else raw
+    communes: dict[str, str] = {}
+    if isinstance(entries, dict):
+        for code, name in entries.items():
+            norm = _normalize_insee_code(code)
+            if norm:
+                communes[norm] = str(name) if name is not None else ""
+        return communes
+    if not isinstance(entries, list):
+        raise ValueError("Format YAML invalide: attendu une liste ou un mapping sous 'communes'.")
+    for entry in entries:
+        if isinstance(entry, str):
+            norm = _normalize_insee_code(entry)
+            if norm:
+                communes[norm] = ""
+            continue
+        if isinstance(entry, dict):
+            code = entry.get("code_insee") or entry.get("code") or entry.get("insee")
+            name = entry.get("nom") or entry.get("name") or ""
+            norm = _normalize_insee_code(code)
+            if norm:
+                communes[norm] = str(name) if name is not None else ""
+            continue
+    return communes
+def load_elections_long(path: Path) -> pd.DataFrame:
+    """
+    Load the harmonised long format dataset (output of notebook 01_pretraitement).
+    """
+    if path.suffix == ".parquet":
+        df = pd.read_parquet(path)
+    else:
+        df = pd.read_csv(path, sep=";")
+    df["date_scrutin"] = pd.to_datetime(df["date_scrutin"])
+    numeric_cols = ["exprimes", "inscrits", "votants", "voix", "blancs", "nuls"]
+    for col in numeric_cols:
+        if col in df.columns:
+            df[col] = pd.to_numeric(df[col], errors="coerce")
+    df["voix"] = df["voix"].fillna(0)
+    return df
+def _mapping_from_yaml(path: Path) -> pd.DataFrame:
+    try:
+        import yaml
+    except Exception as exc:
+        raise RuntimeError("PyYAML est requis pour charger un mapping YAML.") from exc
+    raw = yaml.safe_load(path.read_text()) or {}
+    if not isinstance(raw, dict):
+        raise ValueError("Mapping YAML invalide: attendu un dictionnaire.")
+    base_mapping = raw.get("base_mapping")
+    mapping_entries = raw.get("mapping")
+    overrides = raw.get("overrides", [])
+    mapping = pd.DataFrame()
+    if mapping_entries:
+        mapping = pd.DataFrame(mapping_entries)
+    elif base_mapping:
+        base_path = Path(base_mapping)
+        if not base_path.is_absolute():
+            base_path = path.parent / base_path
+        mapping = pd.read_csv(base_path, sep=";")
+    else:
+        mapping = pd.DataFrame(columns=["code_candidature", "nom_candidature", "bloc_1", "bloc_2", "bloc_3"])
+    if overrides:
+        override_df = pd.DataFrame(overrides)
+        if not override_df.empty:
+            if "blocs" in override_df.columns:
+                blocs = override_df["blocs"].apply(lambda v: v if isinstance(v, list) else [])
+                override_df["bloc_1"] = blocs.apply(lambda v: v[0] if len(v) > 0 else None)
+                override_df["bloc_2"] = blocs.apply(lambda v: v[1] if len(v) > 1 else None)
+                override_df["bloc_3"] = blocs.apply(lambda v: v[2] if len(v) > 2 else None)
+                override_df = override_df.drop(columns=["blocs"])
+            if "code_candidature" not in override_df.columns and "code" in override_df.columns:
+                override_df = override_df.rename(columns={"code": "code_candidature"})
+            if "nom_candidature" not in override_df.columns and "nom" in override_df.columns:
+                override_df = override_df.rename(columns={"nom": "nom_candidature"})
+            mapping = mapping.copy()
+            if "code_candidature" in mapping.columns:
+                mapping["code_candidature"] = mapping["code_candidature"].astype(str)
+            if "code_candidature" in override_df.columns:
+                override_df["code_candidature"] = override_df["code_candidature"].astype(str)
+            for _, row in override_df.iterrows():
+                code = row.get("code_candidature")
+                if code is None:
+                    continue
+                if "code_candidature" in mapping.columns:
+                    mask = mapping["code_candidature"] == code
+                else:
+                    mask = pd.Series([False] * len(mapping))
+                if mask.any():
+                    for col in ["nom_candidature", "bloc_1", "bloc_2", "bloc_3"]:
+                        if col in row and pd.notna(row[col]):
+                            mapping.loc[mask, col] = row[col]
+                else:
+                    mapping = pd.concat([mapping, pd.DataFrame([row])], ignore_index=True)
+    return mapping
+def load_bloc_mapping(path: Path) -> pd.DataFrame:
+    if path.suffix in {".yml", ".yaml"}:
+        mapping = _mapping_from_yaml(path)
+    else:
+        mapping = pd.read_csv(path, sep=";")
+    # normalise bloc labels once to avoid surprises downstream
+    for col in ["bloc_1", "bloc_2", "bloc_3"]:
+        if col in mapping.columns:
+            mapping[col] = mapping[col].apply(normalize_bloc)
+    return mapping
+def expand_voix_by_bloc(elections_long: pd.DataFrame, mapping: pd.DataFrame) -> pd.DataFrame:
+    """
+    Distribute voix of each candidature across its mapped blocs.
+    """
+    df = elections_long.merge(mapping, on="code_candidature", how="left")
+    records: list[dict] = []
+    for _, row in df.iterrows():
+        blocs = [row.get("bloc_1"), row.get("bloc_2"), row.get("bloc_3")]
+        blocs = [b for b in blocs if isinstance(b, str) and b]
+        blocs = [normalize_bloc(b) for b in blocs]
+        if not blocs:
+            blocs = ["centre"]
+        voix = row.get("voix", 0) or 0
+        repartition = voix / len(blocs)
+        for bloc in blocs:
+            records.append(
+                {
+                    "code_bv": row.get("code_bv"),
+                    "nom_bv": row.get("nom_bv"),
+                    "date_scrutin": row.get("date_scrutin"),
+                    "annee": row.get("annee"),
+                    "type_scrutin": row.get("type_scrutin"),
+                    "tour": row.get("tour"),
+                    "bloc": bloc,
+                    "voix_bloc": repartition,
+                    "exprimes": row.get("exprimes"),
+                    "inscrits": row.get("inscrits"),
+                    "votants": row.get("votants"),
+                    "blancs": row.get("blancs"),
+                    "nuls": row.get("nuls"),
+                }
+            )
+    result = pd.DataFrame.from_records(records)
+    result["date_scrutin"] = pd.to_datetime(result["date_scrutin"])
+    for col in ["voix_bloc", "exprimes", "inscrits", "votants", "blancs", "nuls"]:
+        result[col] = pd.to_numeric(result[col], errors="coerce")
+    result["part_bloc"] = result["voix_bloc"] / result["exprimes"]
+    base_inscrits = result["inscrits"].replace(0, pd.NA)
+    result["taux_participation_bv"] = result["votants"] / base_inscrits
+    result["taux_blancs_bv"] = result["blancs"] / base_inscrits
+    result["taux_nuls_bv"] = result["nuls"] / base_inscrits
+    return result
+def compute_national_reference(elections_blocs: pd.DataFrame) -> pd.DataFrame:
+    """
+    Aggregate national part/participation per date & bloc if no external national file is provided.
+    """
+    grouped = (
+        elections_blocs.groupby(["date_scrutin", "bloc"], as_index=False)[["voix_bloc", "exprimes", "votants", "inscrits"]]
+        .sum()
+        .rename(columns={"voix_bloc": "voix_bloc_nat", "exprimes": "exprimes_nat", "votants": "votants_nat", "inscrits": "inscrits_nat"})
+    )
+    grouped["part_bloc_national"] = grouped["voix_bloc_nat"] / grouped["exprimes_nat"].replace(0, pd.NA)
+    grouped["taux_participation_national"] = grouped["votants_nat"] / grouped["inscrits_nat"].replace(0, pd.NA)
+    return grouped[["date_scrutin", "bloc", "part_bloc_national", "taux_participation_national"]]
+def attach_national_results(
+    elections_blocs: pd.DataFrame,
+    resultats_nationaux: Optional[pd.DataFrame] = None,
+) -> pd.DataFrame:
+    """
+    Merge national reference scores if provided; otherwise, compute them from the full dataset.
+    """
+    if resultats_nationaux is None:
+        df_nat = compute_national_reference(elections_blocs)
+    else:
+        df_nat = resultats_nationaux.copy()
+        df_nat["date_scrutin"] = pd.to_datetime(df_nat["date_scrutin"])
+    elections_blocs = elections_blocs.merge(df_nat, on=["date_scrutin", "bloc"], how="left")
+    elections_blocs["ecart_bloc_vs_national"] = (
+        elections_blocs["part_bloc"] - elections_blocs["part_bloc_national"]
+    )
+    elections_blocs["ecart_participation_vs_nat"] = (
+        elections_blocs["taux_participation_bv"] - elections_blocs["taux_participation_national"]
+    )
+    return elections_blocs
+def compute_population_growth(elections_blocs: pd.DataFrame, base_year: int = 2014) -> pd.DataFrame:
+    bv_pop = elections_blocs.groupby(["code_bv", "annee"], as_index=False)["inscrits"].mean()
+    bv_base = (
+        bv_pop[bv_pop["annee"] == base_year][["code_bv", "inscrits"]]
+        .rename(columns={"inscrits": "inscrits_base"})
+    )
+    bv_pop = bv_pop.merge(bv_base, on="code_bv", how="left")
+    bv_pop["croissance_inscrits_depuis_base"] = (
+        bv_pop["inscrits"] - bv_pop["inscrits_base"]
+    ) / bv_pop["inscrits_base"]
+    elections_blocs = elections_blocs.merge(
+        bv_pop[["code_bv", "annee", "croissance_inscrits_depuis_base"]],
+        on=["code_bv", "annee"],
+        how="left",
+    )
+    return elections_blocs
+def add_lag_features(elections_blocs: pd.DataFrame) -> pd.DataFrame:
+    df = elections_blocs.sort_values(["code_bv", "bloc", "date_scrutin"])
+    df["part_bloc_lag1"] = df.groupby(["code_bv", "bloc"])["part_bloc"].shift(1)
+    df["ecart_bloc_vs_national_lag1"] = df.groupby(["code_bv", "bloc"])[
+        "ecart_bloc_vs_national"
+    ].shift(1)
+    df["taux_participation_bv_lag1"] = df.groupby(["code_bv", "bloc"])[
+        "taux_participation_bv"
+    ].shift(1)
+    df["annee_centre"] = df["annee"] - df["annee"].median()
+    return df
+def filter_target_communes(elections_blocs: pd.DataFrame, target_communes: Mapping[str, str]) -> pd.DataFrame:
+    """
+    Keep only bureaux belonging to the target communes list.
+    """
+    df = elections_blocs.copy()
+    if "code_commune" in df.columns:
+        code_series = df["code_commune"].astype(str)
+    else:
+        code_series = df["code_bv"].astype(str).str.split("-").str[0]
+    code_series = code_series.str.replace(r"\D", "", regex=True).str.zfill(5).str.slice(0, 5)
+    df["code_commune"] = code_series
+    df["nom_commune"] = df["code_commune"].map(target_communes)
+    return df[df["code_commune"].isin(target_communes.keys())]
+def compute_commune_event_stats(
+    elections_long: pd.DataFrame,
+    target_communes: Mapping[str, str],
+) -> pd.DataFrame:
+    df = elections_long.copy()
+    if "code_commune" in df.columns:
+        code_series = df["code_commune"].astype(str)
+    else:
+        code_series = df["code_bv"].astype(str).str.split("-").str[0]
+    code_series = code_series.str.replace(r"\D", "", regex=True).str.zfill(5).str.slice(0, 5)
+    df["code_commune"] = code_series
+    df = df[df["code_commune"].isin(target_communes.keys())]
+    df["nom_commune"] = df["code_commune"].map(target_communes)
+    if "date_scrutin" in df.columns:
+        df["date_scrutin"] = pd.to_datetime(df["date_scrutin"], errors="coerce")
+    for col in ["exprimes", "inscrits", "votants", "blancs", "nuls"]:
+        if col in df.columns:
+            df[col] = pd.to_numeric(df[col], errors="coerce")
+        else:
+            df[col] = pd.NA
+    bv_cols = [c for c in ["code_commune", "code_bv", "type_scrutin", "annee", "tour", "date_scrutin"] if c in df.columns]
+    bv_event = (
+        df.groupby(bv_cols, as_index=False)
+        .agg(
+            exprimes=("exprimes", "max"),
+            inscrits=("inscrits", "max"),
+            votants=("votants", "max"),
+            blancs=("blancs", "max"),
+            nuls=("nuls", "max"),
+        )
+    )
+    commune_cols = [c for c in ["code_commune", "type_scrutin", "annee", "tour", "date_scrutin"] if c in bv_event.columns]
+    commune = (
+        bv_event.groupby(commune_cols, as_index=False)
+        .agg(
+            exprimes=("exprimes", "sum"),
+            inscrits=("inscrits", "sum"),
+            votants=("votants", "sum"),
+            blancs=("blancs", "sum"),
+            nuls=("nuls", "sum"),
+        )
+    )
+    base_inscrits = commune["inscrits"].replace(0, pd.NA)
+    commune["turnout_pct"] = commune["votants"] / base_inscrits
+    commune["blancs_pct"] = commune["blancs"] / base_inscrits
+    commune["nuls_pct"] = commune["nuls"] / base_inscrits
+    commune["nom_commune"] = commune["code_commune"].map(target_communes)
+    return commune
+def build_elections_blocs(
+    elections_long_path: Path,
+    mapping_path: Path,
+    *,
+    national_results_path: Optional[Path] = None,
+    base_year: int = 2014,
+    target_communes_path: Path = DEFAULT_COMMUNES_PATH,
+) -> pd.DataFrame:
+    elections_long = load_elections_long(elections_long_path)
+    mapping = load_bloc_mapping(mapping_path)
+    elections_blocs = expand_voix_by_bloc(elections_long, mapping)
+    national_df = None
+    if national_results_path and national_results_path.exists():
+        if national_results_path.suffix == ".parquet":
+            national_df = pd.read_parquet(national_results_path)
+        else:
+            national_df = pd.read_csv(national_results_path, sep=";")
+    # Always attach national reference (computed from full data if no external source)
+    elections_blocs = attach_national_results(elections_blocs, national_df)
+    # Restreindre aux communes cibles via le fichier YAML
+    target_communes = load_target_communes(target_communes_path)
+    elections_blocs = filter_target_communes(elections_blocs, target_communes)
+    elections_blocs = compute_population_growth(elections_blocs, base_year=base_year)
+    elections_blocs = add_lag_features(elections_blocs)
+    return elections_blocs
+def save_processed(df: pd.DataFrame, output_dir: Path) -> None:
+    output_dir.mkdir(parents=True, exist_ok=True)
+    parquet_path = output_dir / "elections_blocs.parquet"
+    csv_path = output_dir / "elections_blocs.csv"
+    df.to_parquet(parquet_path, index=False)
+    df.to_csv(csv_path, sep=";", index=False)
+def save_commune_event_stats(df: pd.DataFrame, output_dir: Path) -> None:
+    output_dir.mkdir(parents=True, exist_ok=True)
+    parquet_path = output_dir / "commune_event_stats.parquet"
+    csv_path = output_dir / "commune_event_stats.csv"
+    df.to_parquet(parquet_path, index=False)
+    df.to_csv(csv_path, sep=";", index=False)
+def run_full_pipeline(
+    elections_long_path: Path = Path("data/interim/elections_long.parquet"),
+    mapping_path: Path = Path("config/nuances.yaml"),
+    output_dir: Path = Path("data/processed"),
+    national_results_path: Optional[Path] = None,
+    target_communes_path: Path = DEFAULT_COMMUNES_PATH,
+) -> pd.DataFrame:
+    df = build_elections_blocs(
+        elections_long_path=elections_long_path,
+        mapping_path=mapping_path,
+        national_results_path=national_results_path,
+        target_communes_path=target_communes_path,
+    )
+    save_processed(df, output_dir)
+    elections_long = load_elections_long(elections_long_path)
+    target_communes = load_target_communes(target_communes_path)
+    commune_stats = compute_commune_event_stats(elections_long, target_communes)
+    save_commune_event_stats(commune_stats, output_dir)
+    return df
+__all__ = [
+    "build_elections_blocs",
+    "run_full_pipeline",
+    "save_processed",
+    "normalize_bloc",
+    "load_target_communes",
+    "compute_commune_event_stats",
+    "save_commune_event_stats",
+]

src/prediction.py ADDED Viewed

	@@ -0,0 +1,311 @@

+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Optional
+import numpy as np
+import pandas as pd
+import warnings
+from .constants import CANDIDATE_CATEGORIES
+from .pipeline import normalize_bloc
+try:
+    from numpy import RankWarning as NP_RANK_WARNING  # type: ignore[attr-defined]
+except Exception:
+    class NP_RANK_WARNING(UserWarning):
+        pass
+@dataclass
+class PredictionResult:
+    category: str
+    predicted_share: float
+    predicted_count: int
+@dataclass
+class PredictionSummary:
+    bloc_predictions: list[PredictionResult]
+    inscrits: Optional[int]
+    votants: Optional[int]
+    blancs: Optional[int]
+    nuls: Optional[int]
+    abstention: Optional[int]
+    exprimes: Optional[int]
+DISPLAY_BLOC_ORDER = [
+    "extreme_gauche",
+    "gauche_dure",
+    "gauche_modere",
+    "centre",
+    "droite_modere",
+    "droite_dure",
+    "extreme_droite",
+]
+EXTRA_CATEGORIES = ["blancs", "nuls", "abstention"]
+def _clip01(value: float) -> float:
+    return float(min(1.0, max(0.0, value)))
+def _last_share(df: pd.DataFrame, bloc: str, *, election: Optional[str] = None, year: Optional[int] = None) -> Optional[float]:
+    subset = df[df["bloc"] == bloc]
+    if election:
+        subset = subset[subset["type_scrutin"] == election]
+    if year is not None:
+        subset = subset[subset["annee"] == year]
+    if subset.empty:
+        return None
+    valid = subset.sort_values("date_scrutin")["part_bloc"].dropna()
+    if valid.empty:
+        return None
+    return valid.iloc[-1]  # type: ignore[index]
+def _last_value(series: pd.Series) -> Optional[float]:
+    series = pd.to_numeric(series, errors="coerce").dropna()
+    if series.empty:
+        return None
+    return float(series.iloc[-1])
+def _project_share(series: pd.Series, years: pd.Series, target_year: int) -> Optional[float]:
+    df = pd.DataFrame({"value": pd.to_numeric(series, errors="coerce"), "year": pd.to_numeric(years, errors="coerce")})
+    df = df.dropna()
+    if df.empty:
+        return None
+    if len(df["year"].unique()) >= 2 and len(df) >= 2:
+        # Guard against poorly conditioned fits on tiny samples
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore", category=NP_RANK_WARNING)
+            try:
+                slope, intercept = np.polyfit(df["year"], df["value"], 1)
+                projected = slope * target_year + intercept
+            except Exception:
+                projected = df["value"].iloc[-1]
+    else:
+        projected = df["value"].iloc[-1]
+    return _clip01(float(projected))
+def _project_rate(
+    series: pd.Series,
+    years: pd.Series,
+    target_year: int,
+    *,
+    min_points_trend: int = 3,
+    clamp_to_observed: bool = True,
+) -> Optional[float]:
+    df = pd.DataFrame(
+        {"value": pd.to_numeric(series, errors="coerce"), "year": pd.to_numeric(years, errors="coerce")}
+    ).dropna()
+    if df.empty:
+        return None
+    values = df["value"].to_numpy()
+    years_arr = df["year"].to_numpy()
+    if len(set(years_arr)) >= min_points_trend and len(df) >= min_points_trend:
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore", category=NP_RANK_WARNING)
+            try:
+                slope, intercept = np.polyfit(years_arr, values, 1)
+                projected = slope * target_year + intercept
+            except Exception:
+                projected = values[-1]
+    else:
+        projected = values[-1]
+    if clamp_to_observed and len(values):
+        projected = min(max(projected, float(np.nanmin(values))), float(np.nanmax(values)))
+    return _clip01(float(projected))
+def _allocate_counts(shares: list[float], total: int) -> list[int]:
+    if total <= 0 or not shares:
+        return [0 for _ in shares]
+    arr = np.clip(np.asarray(shares, dtype=float), 0, None)
+    if arr.sum() == 0:
+        return [0 for _ in shares]
+    arr = arr / arr.sum()
+    raw = arr * total
+    floors = np.floor(raw)
+    remainder = int(total - floors.sum())
+    if remainder > 0:
+        order = np.argsort(-(raw - floors))
+        for idx in order[:remainder]:
+            floors[idx] += 1
+    return floors.astype(int).tolist()
+def compute_predictions(
+    history: pd.DataFrame,
+    *,
+    target_election: str = "municipales",
+    target_year: int = 2026,
+    inscrits_override: Optional[float] = None,
+) -> PredictionSummary:
+    if history.empty:
+        return PredictionSummary([], None, None, None, None, None, None)
+    df = history.copy()
+    target_election = str(target_election).strip().lower()
+    df["bloc"] = df["bloc"].apply(normalize_bloc)
+    if "type_scrutin" in df.columns:
+        df["type_scrutin"] = df["type_scrutin"].astype(str).str.strip().str.lower()
+    # Coerce numeric and infer exprimes when missing from the sum of voix_bloc
+    for col in ["voix_bloc", "exprimes", "inscrits", "votants", "blancs", "nuls"]:
+        if col in df.columns:
+            df[col] = pd.to_numeric(df[col], errors="coerce")
+    for col in ["inscrits", "votants", "blancs", "nuls"]:
+        if col not in df.columns:
+            df[col] = np.nan
+    if "exprimes" in df.columns:
+        sum_voix = df.groupby(["code_bv", "date_scrutin"])["voix_bloc"].transform("sum")
+        df["exprimes"] = df["exprimes"].fillna(sum_voix)
+        df.loc[df["exprimes"] == 0, "exprimes"] = sum_voix
+    if "part_bloc" not in df.columns or df["part_bloc"].isna().all():
+        df["part_bloc"] = df["voix_bloc"] / df["exprimes"]
+    df["part_bloc"] = pd.to_numeric(df["part_bloc"], errors="coerce").clip(upper=1)
+    df = df.dropna(subset=["bloc"])
+    bloc_order = [b for b in DISPLAY_BLOC_ORDER if b in CANDIDATE_CATEGORIES]
+    raw_shares: dict[str, float] = {}
+    for bloc in bloc_order:
+        bloc_hist = df[df["bloc"] == bloc].sort_values("date_scrutin")
+        last_overall = _last_share(bloc_hist, bloc)
+        base_series = bloc_hist["part_bloc"]
+        base_years = bloc_hist["annee"]
+        if not bloc_hist.empty and target_election in bloc_hist["type_scrutin"].values:
+            base_series = bloc_hist[bloc_hist["type_scrutin"] == target_election]["part_bloc"]
+            base_years = bloc_hist[bloc_hist["type_scrutin"] == target_election]["annee"]
+        projected = _project_share(base_series, base_years, target_year)
+        if projected is None and last_overall is not None:
+            projected = last_overall
+        predicted = _clip01(projected or 0.0)
+        raw_shares[bloc] = predicted
+    share_values = np.array([raw_shares.get(b, 0.0) for b in bloc_order], dtype=float)
+    share_sum = share_values.sum()
+    if share_sum > 0:
+        share_values = share_values / share_sum
+    else:
+        share_values = np.zeros_like(share_values)
+    event_cols = [col for col in ["code_bv", "date_scrutin", "type_scrutin", "tour", "annee"] if col in df.columns]
+    event_df = df.groupby(event_cols, as_index=False).agg(
+        inscrits=("inscrits", "max"),
+        votants=("votants", "max"),
+        blancs=("blancs", "max"),
+        nuls=("nuls", "max"),
+    )
+    if "date_scrutin" in event_df.columns:
+        event_df = event_df.sort_values("date_scrutin")
+    if "type_scrutin" not in event_df.columns:
+        event_df["type_scrutin"] = ""
+    if "annee" not in event_df.columns:
+        if "date_scrutin" in event_df.columns:
+            event_df["annee"] = pd.to_datetime(event_df["date_scrutin"], errors="coerce").dt.year
+        else:
+            event_df["annee"] = np.nan
+    base_inscrits = event_df["inscrits"].replace(0, pd.NA)
+    event_df["taux_participation"] = event_df["votants"] / base_inscrits
+    event_df["taux_blancs"] = event_df["blancs"] / base_inscrits
+    event_df["taux_nuls"] = event_df["nuls"] / base_inscrits
+    def _select_series(col: str) -> tuple[pd.Series, pd.Series]:
+        scoped = event_df
+        if "tour" in event_df.columns:
+            round1 = event_df[event_df["tour"] == 1]
+            if not round1.empty:
+                scoped = round1
+        if not scoped.empty and target_election in scoped["type_scrutin"].values:
+            mask = scoped["type_scrutin"] == target_election
+            return scoped.loc[mask, col], scoped.loc[mask, "annee"]
+        return scoped[col], scoped["annee"]
+    turnout_series, turnout_years = _select_series("taux_participation")
+    blancs_series, blancs_years = _select_series("taux_blancs")
+    nuls_series, nuls_years = _select_series("taux_nuls")
+    taux_participation = _project_rate(turnout_series, turnout_years, target_year)
+    taux_blancs = _project_rate(blancs_series, blancs_years, target_year)
+    taux_nuls = _project_rate(nuls_series, nuls_years, target_year)
+    inscrits_used = None
+    if inscrits_override is not None:
+        try:
+            value = float(inscrits_override)
+            if value > 0:
+                inscrits_used = value
+        except (TypeError, ValueError):
+            inscrits_used = None
+    if inscrits_used is None:
+        inscrits_used = _last_value(event_df["inscrits"])
+    if inscrits_used is None:
+        return PredictionSummary([], None, None, None, None, None, None)
+    if taux_participation is None:
+        taux_participation = 0.0
+    if taux_blancs is None:
+        taux_blancs = 0.0
+    if taux_nuls is None:
+        taux_nuls = 0.0
+    if taux_blancs + taux_nuls > taux_participation and (taux_blancs + taux_nuls) > 0:
+        scale = taux_participation / (taux_blancs + taux_nuls)
+        taux_blancs *= scale
+        taux_nuls *= scale
+    inscrits_total = int(round(inscrits_used))
+    votants_total = int(round(inscrits_total * taux_participation))
+    blancs_total = int(round(inscrits_total * taux_blancs))
+    nuls_total = int(round(inscrits_total * taux_nuls))
+    if blancs_total + nuls_total > votants_total and (blancs_total + nuls_total) > 0:
+        scale = votants_total / (blancs_total + nuls_total)
+        blancs_total = int(round(blancs_total * scale))
+        nuls_total = int(round(nuls_total * scale))
+    exprimes_total = max(0, votants_total - blancs_total - nuls_total)
+    abstention_total = max(0, inscrits_total - votants_total)
+    bloc_counts = _allocate_counts(share_values.tolist(), exprimes_total)
+    bloc_predictions: list[PredictionResult] = []
+    for bloc, share, count in zip(bloc_order, share_values.tolist(), bloc_counts):
+        bloc_predictions.append(
+            PredictionResult(
+                category=bloc,
+                predicted_share=float(share),
+                predicted_count=int(count),
+            )
+        )
+    return PredictionSummary(
+        bloc_predictions=bloc_predictions,
+        inscrits=inscrits_total,
+        votants=votants_total,
+        blancs=blancs_total,
+        nuls=nuls_total,
+        abstention=abstention_total,
+        exprimes=exprimes_total,
+    )
+def predictions_as_dataframe(summary: PredictionSummary) -> pd.DataFrame:
+    if summary is None or not summary.bloc_predictions:
+        return pd.DataFrame(columns=["categorie", "nombre"])
+    rows = []
+    pred_map = {item.category: item for item in summary.bloc_predictions}
+    for bloc in [b for b in DISPLAY_BLOC_ORDER if b in pred_map]:
+        item = pred_map[bloc]
+        rows.append({"categorie": bloc, "nombre": int(item.predicted_count)})
+    if summary.blancs is not None:
+        rows.append({"categorie": "blancs", "nombre": int(summary.blancs)})
+    if summary.nuls is not None:
+        rows.append({"categorie": "nuls", "nombre": int(summary.nuls)})
+    if summary.abstention is not None:
+        rows.append({"categorie": "abstention", "nombre": int(summary.abstention)})
+    return pd.DataFrame(rows)
+__all__ = ["compute_predictions", "predictions_as_dataframe", "PredictionResult", "PredictionSummary"]