stephmnt commited on
Commit
46f9144
·
verified ·
1 Parent(s): 6197f01

Sync from GitHub Actions

Browse files
.env.example ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ POSTGRES_USER=sete_admin
2
+ POSTGRES_PASSWORD=sete_password
3
+ POSTGRES_DB=elections
4
+ POSTGRES_PORT=5432
5
+ POSTGRES_HOST=localhost
6
+ # Option directe si vous préférez définir l'URL complète :
7
+ # DATABASE_URL=postgresql+psycopg2://sete_admin:sete_password@localhost:5432/elections
.gitignore ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Spécifique à ce projet
2
+ .DS_Store
3
+ *.code-workspace
4
+ *.pdf
5
+ /output/
6
+ questions.md
7
+ /reports/
8
+ /data/external/
9
+ /data/raw/
10
+ /datasets/
11
+ /data/processed/
12
+ /data/contours-france-entiere-latest-v2.geojson
13
+ data/interim/*
14
+ !data/interim/elections_long.parquet
15
+ runtime.txt
16
+ /logs/
17
+ .vscode
18
+ supports/
19
+ # Hugging Face
20
+ .hf/
21
+ .huggingface/
22
+ # vim
23
+ *.swp
24
+ *.swo
25
+
26
+ ## https://github.com/github/gitignore/blob/e8554d85bf62e38d6db966a50d2064ac025fd82a/Python.gitignore
27
+
28
+ # Byte-compiled / optimized / DLL files
29
+ __pycache__/
30
+ *.py[cod]
31
+ *$py.class
32
+
33
+ # C extensions
34
+ *.so
35
+
36
+ # Distribution / packaging
37
+ .Python
38
+ build/
39
+ develop-eggs/
40
+ dist/
41
+ downloads/
42
+ eggs/
43
+ .eggs/
44
+ lib/
45
+ lib64/
46
+ parts/
47
+ sdist/
48
+ var/
49
+ wheels/
50
+ share/python-wheels/
51
+ *.egg-info/
52
+ .installed.cfg
53
+ *.egg
54
+ MANIFEST
55
+
56
+ # PyInstaller
57
+ # Usually these files are written by a python script from a template
58
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
59
+ *.manifest
60
+ *.spec
61
+
62
+ # Installer logs
63
+ pip-log.txt
64
+ pip-delete-this-directory.txt
65
+
66
+ # Unit test / coverage reports
67
+ htmlcov/
68
+ .tox/
69
+ .nox/
70
+ .coverage
71
+ .coverage.*
72
+ .cache
73
+ nosetests.xml
74
+ coverage.xml
75
+ *.cover
76
+ *.py,cover
77
+ .hypothesis/
78
+ .pytest_cache/
79
+ cover/
80
+
81
+ # Translations
82
+ *.mo
83
+ *.pot
84
+
85
+ # Django stuff:
86
+ *.log
87
+ local_settings.py
88
+ db.sqlite3
89
+ db.sqlite3-journal
90
+
91
+ # Flask stuff:
92
+ instance/
93
+ .webassets-cache
94
+
95
+ # Scrapy stuff:
96
+ .scrapy
97
+
98
+ # PyBuilder
99
+ .pybuilder/
100
+ target/
101
+
102
+ # Jupyter Notebook
103
+ .ipynb_checkpoints
104
+
105
+ # IPython
106
+ profile_default/
107
+ ipython_config.py
108
+
109
+ # pyenv
110
+ # For a library or package, you might want to ignore these files since the code is
111
+ # intended to run in multiple environments; otherwise, check them in:
112
+ # .python-version
113
+
114
+ # pipenv
115
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
116
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
117
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
118
+ # install all needed dependencies.
119
+ #Pipfile.lock
120
+
121
+ # UV
122
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
123
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
124
+ # commonly ignored for libraries.
125
+ #uv.lock
126
+
127
+ # poetry
128
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
129
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
130
+ # commonly ignored for libraries.
131
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
132
+ #poetry.lock
133
+
134
+ # pdm
135
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
136
+ #pdm.lock
137
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
138
+ # in version control.
139
+ # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
140
+ .pdm.toml
141
+ .pdm-python
142
+ .pdm-build/
143
+
144
+ # pixi
145
+ # pixi.lock should be committed to version control for reproducibility
146
+ # .pixi/ contains the environments and should not be committed
147
+ .pixi/
148
+
149
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
150
+ __pypackages__/
151
+
152
+ # Celery stuff
153
+ celerybeat-schedule
154
+ celerybeat.pid
155
+
156
+ # SageMath parsed files
157
+ *.sage.py
158
+
159
+ # Environments
160
+ .env
161
+ .venv
162
+ env/
163
+ venv/
164
+ ENV/
165
+ env.bak/
166
+ venv.bak/
167
+
168
+ # Spyder project settings
169
+ .spyderproject
170
+ .spyproject
171
+
172
+ # Rope project settings
173
+ .ropeproject
174
+
175
+ # mypy
176
+ .mypy_cache/
177
+ .dmypy.json
178
+ dmypy.json
179
+
180
+ # Pyre type checker
181
+ .pyre/
182
+
183
+ # pytype static type analyzer
184
+ .pytype/
185
+
186
+ # Cython debug symbols
187
+ cython_debug/
188
+
189
+ # PyCharm
190
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
191
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
192
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
193
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
194
+ #.idea/
195
+
196
+ # Ruff stuff:
197
+ .ruff_cache/
198
+
199
+ # PyPI configuration file
200
+ .pypirc
README.md CHANGED
@@ -9,4 +9,340 @@ app_file: app.py
9
  pinned: false
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  pinned: false
10
  ---
11
 
12
+ # Elections Sète - Prévision municipales
13
+
14
+ Pipeline complet pour harmoniser les données électorales, construire un dataset panel sans fuite temporelle, entraîner des modèles multi-blocs, charger l'historique dans PostgreSQL et exposer des résultats via Gradio.
15
+
16
+ ## Installation
17
+ - Python 3.10+ recommandé.
18
+ - `python3 -m venv .venv && source .venv/bin/activate`
19
+ - `pip install -r requirements.txt`
20
+
21
+ ## Fichiers YAML (configuration)
22
+ ### `config/communes.yaml`
23
+ Ce fichier définit **les communes à inclure** (codes INSEE). Il est consommé par le pipeline (`src.pipeline.run_full_pipeline`) pour filtrer les données au niveau commune.
24
+
25
+ Formats acceptés (les codes sont normalisés en 5 chiffres) :
26
+ ```yaml
27
+ communes:
28
+ "34301": "Sète"
29
+ "34172": "Frontignan"
30
+ ```
31
+ ou
32
+ ```yaml
33
+ communes:
34
+ - code_insee: "34301"
35
+ nom: "Sète"
36
+ - "34172"
37
+ ```
38
+ Si tu modifies ce fichier, il faut **relancer le pipeline** pour régénérer les données filtrées.
39
+
40
+ ### `config/raw_sources.yaml`
41
+ Description des fichiers bruts et de leur structure (colonnes, séparateur, métadonnées).
42
+ C'est **le point d'entrée** pour ajouter un nouveau CSV au pipeline.
43
+
44
+ Exemple (copie d'une election precedente + ajustements) :
45
+ ```yaml
46
+ 24_L_T1.csv:
47
+ copy_from: 22_L_T1.csv
48
+ date_scrutin: "2024-06-30"
49
+ code_bv_cols: ["Code commune", "Code BV"]
50
+ rename_map:
51
+ Nuance Liste: code_candidature
52
+ Libellé Abrégé Liste: nom_candidature
53
+ ```
54
+
55
+ ### `config/nuances.yaml`
56
+ Mapping des nuances vers les blocs politiques (avec overrides).
57
+ Par défaut, le mapping CSV historique est réutilise et on peut **surcharger** ou **ajouter** des nuances :
58
+ ```yaml
59
+ base_mapping: data/mapping_candidats_blocs.csv
60
+ overrides:
61
+ - code_candidature: "XYZ"
62
+ nom_candidature: "Exemple"
63
+ blocs: [gauche_modere, centre]
64
+ ```
65
+
66
+ ### `docker-compose.yml`
67
+ Fichier YAML pour démarrer PostgreSQL (et éventuellement pgAdmin). Utilisé par :
68
+ ```bash
69
+ docker-compose up -d postgres
70
+ docker-compose --profile admin up
71
+ ```
72
+
73
+ ## 1. Prétraitement (harmonisation)
74
+ ```bash
75
+ # Harmonisation des CSV bruts -> data/interim/elections_long.parquet
76
+ python -m src.data.preprocess --raw-dir data/raw --output-dir data/interim
77
+ ```
78
+ Par défaut, le prétraitement lit `config/raw_sources.yaml`. Tu peux surcharger via `--meta-config`.
79
+
80
+ ## 2. Pipeline communes + features (optionnel mais recommandé si tu filtres par communes)
81
+ Le pipeline applique le filtre `config/communes.yaml` et génère `data/processed/elections_blocs.*`.
82
+ À lancer depuis un notebook ou un petit script :
83
+ ```bash
84
+ python3 - <<'PY'
85
+ from pathlib import Path
86
+ from src.pipeline import run_full_pipeline
87
+
88
+ run_full_pipeline(
89
+ elections_long_path=Path("data/interim/elections_long.parquet"),
90
+ mapping_path=Path("config/nuances.yaml"),
91
+ output_dir=Path("data/processed"),
92
+ target_communes_path=Path("config/communes.yaml"),
93
+ )
94
+ PY
95
+ ```
96
+
97
+ ## 3. Construction du panel (features + cibles)
98
+ ```bash
99
+ python -m src.features.build_features \
100
+ --elections-long data/interim/elections_long.parquet \
101
+ --mapping config/nuances.yaml \
102
+ --output data/processed/panel.parquet
103
+ ```
104
+ Le dictionnaire de données est généré dans `data/processed/data_dictionary.md`.
105
+
106
+ Note : `src.features.build_features` **ne filtre pas** via `config/communes.yaml`. Si tu veux limiter l'entraînement à certaines communes, filtre `elections_long` en amont ou adapte le pipeline.
107
+
108
+ ## 4. Base PostgreSQL
109
+ ```bash
110
+ cp .env.example .env
111
+ docker-compose up -d postgres # pgAdmin en option: `docker-compose --profile admin up`
112
+
113
+ # Ingestion du panel dans le schéma normalisé
114
+ python -m src.db.ingest --input data/processed/panel.parquet
115
+ ```
116
+ Le schéma est défini dans `src/db/schema.py`.
117
+
118
+ ## 5. Entraînement & évaluation
119
+ Commande demandée (CV stricte par scrutin) :
120
+ ```bash
121
+ python3 -m src.model.train --cv-splits 4 --models hist_gradient_boosting
122
+ ```
123
+
124
+ Options principales :
125
+ - `--panel` : chemin du panel (`data/processed/panel.parquet` par défaut).
126
+ - `--models-dir` / `--reports-dir` : sorties modèles et rapports.
127
+ - `--train-end-year`, `--valid-end-year`, `--test-start-year` : split temporel.
128
+ - `--cv-splits` : nb de folds temporels (par scrutin).
129
+ - `--no-tune` : désactive la grille d'hyperparamètres.
130
+ - `--max-trials` : limite le nombre d'essais par modèle.
131
+ - `--models` : liste de modèles à tester (ex: `ridge`, `hist_gradient_boosting`, `lightgbm`, `xgboost`, `two_stage_hgb`, `catboost`).
132
+
133
+ Sorties :
134
+ - Modèle + preprocessor : `models/<nom>.joblib` et `models/feature_columns.json`
135
+ - Modèle sélectionné : `models/best_model.json`
136
+ - Rapport métriques : `reports/metrics.json` et `reports/metrics.md`
137
+ - CV détaillée : `reports/cv_summary.csv`
138
+ - Figure : `reports/figures/mae_per_category.png`
139
+ - Model card : `models/model_card.md`
140
+
141
+ ## 6. Génération de prédictions hors ligne
142
+ ```bash
143
+ python -m src.model.predict \
144
+ --model-path models/hist_gradient_boosting.joblib \
145
+ --target-election-type municipales \
146
+ --target-year 2026 \
147
+ --commune-code 34301
148
+ # -> predictions/pred_municipales_2026_sete.csv
149
+ ```
150
+ Cette commande produit des **parts (%)** et des deltas vs législatives et municipales 2020.
151
+
152
+ ## 7. Application Gradio
153
+ ```bash
154
+ python -m app.gradio_app
155
+ ```
156
+ Comportement :
157
+ - Backend PostgreSQL si disponible, sinon fallback fichiers locaux.
158
+ - **Historique** : consultation bureau par bureau (pas de ML).
159
+ - **Prédiction** : parts par bloc converties en **comptes** (personnes) + `blancs`, `nuls`, `abstentions`.
160
+ - `inscrits` peut être fourni par l'utilisateur (sinon valeur historique la plus récente du bureau).
161
+ - Cibles proposées : municipales 2026 (tour 1), legislatives 2027 (tour 1), presidentielles 2027 (tour 1).
162
+
163
+ ## Structure des données
164
+ - Configurations : `config/`
165
+ - Bruts : `data/raw/`
166
+ - Long harmonisé : `data/interim/elections_long.parquet`
167
+ - Élections blocs (filtrées) : `data/processed/elections_blocs.parquet`
168
+ - Stats communales par scrutin : `data/processed/commune_event_stats.parquet`
169
+ - Panel features+cibles : `data/processed/panel.parquet`
170
+ - Mapping nuances -> catégories : `config/nuances.yaml` (base: `data/mapping_candidats_blocs.csv`)
171
+
172
+ ## Notes
173
+ - Aucune fuite temporelle : les features sont calculées uniquement sur des scrutins strictement antérieurs à la cible.
174
+ - Les parts sont clipées à [0, 1] puis renormalisées.
175
+ - Les blancs/nuls dépendent des colonnes disponibles dans l'historique ; si une source ne les fournit pas, ils seront à 0.
176
+
177
+ ## Inventaire des fichiers (snapshot)
178
+ Statuts :
179
+ - `actif` : utilisé par le pipeline actuel.
180
+ - `généré` : produit par le pipeline/entraînement (recréable).
181
+ - `hérité (début projet)` : ancien fichier ou prototype.
182
+ - `optionnel` : utile mais non requis au runtime.
183
+ - `système (inutile)` : métadonnées OS.
184
+
185
+ | Fichier | Fonction | Statut |
186
+ |---|---|---|
187
+ | `.DS_Store` | Métadonnées macOS | système (inutile) |
188
+ | `.env.example` | Template des variables d'environnement (DB) | actif |
189
+ | `.gitignore` | Règles gitignore | actif |
190
+ | `Elections_Sete.code-workspace` | Config VSCode (workspace) | optionnel |
191
+ | `README.md` | Documentation projet | actif |
192
+ | `app/__init__.py` | Package app (init) | actif |
193
+ | `app/app.py` | Ancienne app Gradio (bv_features.parquet) | hérité (début projet) |
194
+ | `app/gradio_app.py` | Application Gradio principale | actif |
195
+ | `app.py` | Ancienne interface Gradio (compute_predictions) | hérité (début projet) |
196
+ | `catboost_info/catboost_training.json` | Artefacts CatBoost (logs/metrics) | généré |
197
+ | `catboost_info/learn/events.out.tfevents` | Artefacts CatBoost (logs/metrics) | généré |
198
+ | `catboost_info/learn_error.tsv` | Artefacts CatBoost (logs/metrics) | généré |
199
+ | `catboost_info/time_left.tsv` | Artefacts CatBoost (logs/metrics) | généré |
200
+ | `config/communes.yaml` | Liste des communes cibles (codes INSEE) | actif |
201
+ | `config/nuances.yaml` | Overrides mapping nuances -> blocs | actif |
202
+ | `config/raw_sources.yaml` | Schéma des CSV bruts (meta-config) | actif |
203
+ | `data/.DS_Store` | Métadonnées macOS | système (inutile) |
204
+ | `data/contours-france-entiere-latest-v2.geojson` | Fond cartographique (geojson) | optionnel |
205
+ | `data/interim/.DS_Store` | Métadonnées macOS | système (inutile) |
206
+ | `data/interim/candidates_long.parquet` | Données intermédiaires long format | généré |
207
+ | `data/interim/elections_long.csv` | Données intermédiaires long format | généré |
208
+ | `data/interim/elections_long.parquet` | Données intermédiaires long format | généré |
209
+ | `data/interim/frames_std/14_EU.parquet` | Intermédiaire standardisé par scrutin | généré |
210
+ | `data/interim/frames_std/14_MN14_T1T2.parquet` | Intermédiaire standardisé par scrutin | généré |
211
+ | `data/interim/frames_std/17_L_T1.parquet` | Intermédiaire standardisé par scrutin | généré |
212
+ | `data/interim/frames_std/17_L_T2.parquet` | Intermédiaire standardisé par scrutin | généré |
213
+ | `data/interim/frames_std/17_PR_T1.parquet` | Intermédiaire standardisé par scrutin | généré |
214
+ | `data/interim/frames_std/17_PR_T2.parquet` | Intermédiaire standardisé par scrutin | généré |
215
+ | `data/interim/frames_std/19_EU.parquet` | Intermédiaire standardisé par scrutin | généré |
216
+ | `data/interim/frames_std/20_MN_T1.parquet` | Intermédiaire standardisé par scrutin | généré |
217
+ | `data/interim/frames_std/20_MN_T2.parquet` | Intermédiaire standardisé par scrutin | généré |
218
+ | `data/interim/frames_std/21_DEP_T1.parquet` | Intermédiaire standardisé par scrutin | généré |
219
+ | `data/interim/frames_std/21_DEP_T2.parquet` | Intermédiaire standardisé par scrutin | généré |
220
+ | `data/interim/frames_std/21_REG_T1.parquet` | Intermédiaire standardisé par scrutin | généré |
221
+ | `data/interim/frames_std/21_REG_T2.parquet` | Intermédiaire standardisé par scrutin | généré |
222
+ | `data/interim/frames_std/22_L_T1.parquet` | Intermédiaire standardisé par scrutin | généré |
223
+ | `data/interim/frames_std/22_L_T2.parquet` | Intermédiaire standardisé par scrutin | généré |
224
+ | `data/interim/frames_std/22_PR_T1.parquet` | Intermédiaire standardisé par scrutin | généré |
225
+ | `data/interim/frames_std/22_PR_T2.parquet` | Intermédiaire standardisé par scrutin | généré |
226
+ | `data/interim/frames_std/24_EU.parquet` | Intermédiaire standardisé par scrutin | généré |
227
+ | `data/interim/harmonized/14_EU_harmonized.csv` | CSV harmonisé par scrutin | généré |
228
+ | `data/interim/harmonized/14_MN14_T1T2_harmonized.csv` | CSV harmonisé par scrutin | généré |
229
+ | `data/interim/harmonized/17_L_T1_harmonized.csv` | CSV harmonisé par scrutin | généré |
230
+ | `data/interim/harmonized/17_L_T2_harmonized.csv` | CSV harmonisé par scrutin | généré |
231
+ | `data/interim/harmonized/17_PR_T1_harmonized.csv` | CSV harmonisé par scrutin | généré |
232
+ | `data/interim/harmonized/17_PR_T2_harmonized.csv` | CSV harmonisé par scrutin | généré |
233
+ | `data/interim/harmonized/19_EU_harmonized.csv` | CSV harmonisé par scrutin | généré |
234
+ | `data/interim/harmonized/20_MN_T1_harmonized.csv` | CSV harmonisé par scrutin | généré |
235
+ | `data/interim/harmonized/20_MN_T2_harmonized.csv` | CSV harmonisé par scrutin | généré |
236
+ | `data/interim/harmonized/21_DEP_T1_harmonized.csv` | CSV harmonisé par scrutin | généré |
237
+ | `data/interim/harmonized/21_DEP_T2_harmonized.csv` | CSV harmonisé par scrutin | généré |
238
+ | `data/interim/harmonized/21_REG_T1_harmonized.csv` | CSV harmonisé par scrutin | généré |
239
+ | `data/interim/harmonized/21_REG_T2_harmonized.csv` | CSV harmonisé par scrutin | généré |
240
+ | `data/interim/harmonized/22_L_T1_harmonized.csv` | CSV harmonisé par scrutin | généré |
241
+ | `data/interim/harmonized/22_L_T2_harmonized.csv` | CSV harmonisé par scrutin | généré |
242
+ | `data/interim/harmonized/22_PR_T1_harmonized.csv` | CSV harmonisé par scrutin | généré |
243
+ | `data/interim/harmonized/22_PR_T2_harmonized.csv` | CSV harmonisé par scrutin | généré |
244
+ | `data/interim/harmonized/24_EU_harmonized.csv` | CSV harmonisé par scrutin | généré |
245
+ | `data/interim/unmapped_nuances.csv` | Données intermédiaires long format | généré |
246
+ | `data/mapping_candidats_blocs.csv` | Mapping nuances -> blocs (base) | actif |
247
+ | `data/mappings/category_mapping.csv` | Copie/variante de mapping | hérité (début projet) |
248
+ | `data/processed/bv_features.parquet` | Features legacy (utilisées par app/app.py) | hérité (début projet) |
249
+ | `data/processed/data_dictionary.md` | Dictionnaire de données généré | généré (doc) |
250
+ | `data/processed/elections_blocs.csv` | Dataset blocs (filtré communes) | généré (utilisé) |
251
+ | `data/processed/elections_blocs.parquet` | Dataset blocs (filtré communes) | généré (utilisé) |
252
+ | `data/processed/history_cache.parquet` | Cache local (historique/prédictions) | généré (cache) |
253
+ | `data/processed/panel.csv` | Panel features+cibles | généré (utilisé) |
254
+ | `data/processed/panel.parquet` | Panel features+cibles | généré (utilisé) |
255
+ | `data/processed/predictions_cache.parquet` | Cache local (historique/prédictions) | généré (cache) |
256
+ | `data/processed/predictions_municipales_2026.csv` | Exports de prédictions | généré (résultats) |
257
+ | `data/processed/predictions_municipales_2026_blocs.csv` | Exports de prédictions | généré (résultats) |
258
+ | `data/processed/predictions_municipales_sete_2026.csv` | Exports de prédictions | généré (résultats) |
259
+ | `data/raw/14_EU.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
260
+ | `data/raw/14_MN14_T1T2.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
261
+ | `data/raw/17_L_T1.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
262
+ | `data/raw/17_L_T2.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
263
+ | `data/raw/17_PR_T1.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
264
+ | `data/raw/17_PR_T2.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
265
+ | `data/raw/19_EU.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
266
+ | `data/raw/20_MN_T1.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
267
+ | `data/raw/20_MN_T2.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
268
+ | `data/raw/21_DEP_T1.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
269
+ | `data/raw/21_DEP_T2.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
270
+ | `data/raw/21_REG_T1.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
271
+ | `data/raw/21_REG_T2.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
272
+ | `data/raw/22_L_T1.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
273
+ | `data/raw/22_L_T2.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
274
+ | `data/raw/22_PR_T1.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
275
+ | `data/raw/22_PR_T2.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
276
+ | `data/raw/24_EU.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
277
+ | `data/raw/24_L_T1.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
278
+ | `data/raw/24_L_T2.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
279
+ | `datasets/.DS_Store` | Métadonnées macOS | système (inutile) |
280
+ | `datasets/14_EU.csv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
281
+ | `datasets/14_MN14_T1T2.csv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
282
+ | `datasets/17_L_T1.csv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
283
+ | `datasets/17_L_T2.csv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
284
+ | `datasets/17_PR_T1.csv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
285
+ | `datasets/17_PR_T2.csv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
286
+ | `datasets/19_EU.csv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
287
+ | `datasets/20_MN_T1.tsv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
288
+ | `datasets/20_MN_T2.csv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
289
+ | `datasets/21_DEP_T1.csv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
290
+ | `datasets/21_DEP_T2.csv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
291
+ | `datasets/21_REG_T1.csv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
292
+ | `datasets/21_REG_T2.csv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
293
+ | `datasets/22_L_T1.csv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
294
+ | `datasets/22_L_T2.csv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
295
+ | `datasets/22_PR_T1.csv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
296
+ | `datasets/22_PR_T2.csv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
297
+ | `datasets/24_EU.csv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
298
+ | `datasets/24_L_T1T2.csv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
299
+ | `datasets/24_L_T2.csv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
300
+ | `docker-compose.yml` | Services Docker (PostgreSQL/pgAdmin) | actif |
301
+ | `harmoniser.md` | Notes d'harmonisation | optionnel |
302
+ | `main.py` | Orchestrateur pipeline (CLI utilitaire) | optionnel |
303
+ | `mission.md` | Backlog / notes projet | optionnel |
304
+ | `models/best_model.json` | Nom du meilleur modèle | généré (utilisé) |
305
+ | `models/feature_columns.json` | Liste des features du modèle | généré (utilisé) |
306
+ | `models/hist_gradient_boosting.joblib` | Modèle entraîné | généré (utilisé) |
307
+ | `models/model_card.md` | Model card (synthèse) | généré (doc) |
308
+ | `notebooks/01_pretraitement.ipynb` | Notebook d'analyse / exploration | optionnel (exploration) |
309
+ | `notebooks/02_feature_engineering.ipynb` | Notebook d'analyse / exploration | optionnel (exploration) |
310
+ | `notebooks/03_modelisation_prediction.ipynb` | Notebook d'analyse / exploration | optionnel (exploration) |
311
+ | `notebooks/aed.ipynb` | Notebook d'analyse / exploration | optionnel (exploration) |
312
+ | `notebooks/catboost_info/catboost_training.json` | Artefacts CatBoost (notebook) | généré |
313
+ | `notebooks/catboost_info/learn/events.out.tfevents` | Artefacts CatBoost (notebook) | généré |
314
+ | `notebooks/catboost_info/learn_error.tsv` | Artefacts CatBoost (notebook) | généré |
315
+ | `notebooks/catboost_info/time_left.tsv` | Artefacts CatBoost (notebook) | généré |
316
+ | `output/.DS_Store` | Métadonnées macOS | système (inutile) |
317
+ | `output/Sans titre 2.png` | Exports graphiques | hérité (début projet) |
318
+ | `output/Sans titre 3.png` | Exports graphiques | hérité (début projet) |
319
+ | `output/Sans titre 4.png` | Exports graphiques | hérité (début projet) |
320
+ | `output/Sans titre 5.png` | Exports graphiques | hérité (début projet) |
321
+ | `output/Sans titre 6.png` | Exports graphiques | hérité (début projet) |
322
+ | `output/Sans titre.png` | Exports graphiques | hérité (début projet) |
323
+ | `output/output.png` | Exports graphiques | hérité (début projet) |
324
+ | `predictions/pred_municipales_2026_sete.csv` | Exports de prédictions | généré (résultats) |
325
+ | `reports/colonnes_comparatif.csv` | Rapport / métriques | généré |
326
+ | `reports/cv_summary.csv` | Rapport / métriques | généré |
327
+ | `reports/figures/mae_per_category.png` | Figures de rapports | généré |
328
+ | `reports/metrics.json` | Rapport / métriques | généré |
329
+ | `reports/metrics.md` | Rapport / note analytique | généré (doc) |
330
+ | `reports/notebook_audit.md` | Rapport / note analytique | généré (doc) |
331
+ | `requirements.txt` | Dépendances Python | actif |
332
+ | `src/__init__.py` | Package src (init) | actif |
333
+ | `src/constants.py` | Constantes projet | actif |
334
+ | `src/data/__init__.py` | Module data | actif |
335
+ | `src/data/preprocess.py` | Prétraitement/harmonisation | actif |
336
+ | `src/data_prep.py` | Librairie d'harmonisation des données | actif |
337
+ | `src/database.py` | Accès base SQL (fallback/app) | actif |
338
+ | `src/db/__init__.py` | Module DB | actif |
339
+ | `src/db/ingest.py` | Ingestion PostgreSQL | actif |
340
+ | `src/db/schema.py` | Schéma PostgreSQL | actif |
341
+ | `src/features/__init__.py` | Module features | actif |
342
+ | `src/features/build_features.py` | Construction du panel features+cibles | actif |
343
+ | `src/model/predict.py` | Prédiction hors ligne | actif |
344
+ | `src/model/train.py` | Entraînement + CV | actif |
345
+ | `src/pipeline.py` | Pipeline de construction (blocs + stats) | actif |
346
+ | `src/prediction.py` | Prédiction legacy (app.py) | hérité (début projet) |
347
+ | `supports/Plan-2024_Bureaux-de-vote.pdf` | Documents de référence | optionnel |
348
+ | `supports/zonages_admin_canton.pdf` | Documents de référence | optionnel |
app.py CHANGED
@@ -1,7 +1,22 @@
1
- import gradio as gr
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
 
3
+ import importlib.util
4
+ from pathlib import Path
5
 
6
+
7
+ def _load_gradio_module():
8
+ module_path = Path(__file__).resolve().parent / "app" / "gradio_app.py"
9
+ spec = importlib.util.spec_from_file_location("gradio_app_module", module_path)
10
+ if spec is None or spec.loader is None:
11
+ raise RuntimeError(f"Impossible de charger {module_path}")
12
+ module = importlib.util.module_from_spec(spec)
13
+ spec.loader.exec_module(module)
14
+ return module
15
+
16
+
17
+ _gradio = _load_gradio_module()
18
+ demo = _gradio.create_interface()
19
+
20
+
21
+ if __name__ == "__main__":
22
+ demo.launch(server_name="0.0.0.0", server_port=7860)
app/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ """
2
+ Gradio application package.
3
+ """
app/app.py ADDED
@@ -0,0 +1,328 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+
6
+ # =========================
7
+ # Chargement des données
8
+ # =========================
9
+
10
+ DATA_PATH = "data/processed/bv_features.parquet"
11
+
12
+ df = pd.read_parquet(DATA_PATH)
13
+ df["date_scrutin"] = pd.to_datetime(df.get("date_scrutin"), errors="coerce") # type: ignore
14
+ df["tour"] = pd.to_numeric(df.get("tour"), errors="coerce").astype("Int64") # type: ignore
15
+
16
+ # -------------------------
17
+ # Filtrage Sète uniquement
18
+ # -------------------------
19
+ # Hypothèse : code_commune INSEE
20
+ SETE_CODE_INSEE = "34301"
21
+
22
+ def resolve_code_commune(df_in: pd.DataFrame) -> tuple[pd.DataFrame, str | None]:
23
+ df_out = df_in.copy()
24
+ if "code_commune" in df_out.columns:
25
+ df_out["code_commune"] = df_out["code_commune"].astype("string")
26
+ return df_out, None
27
+ if "Code de la commune" in df_out.columns:
28
+ df_out = df_out.rename(columns={"Code de la commune": "code_commune"})
29
+ df_out["code_commune"] = df_out["code_commune"].astype("string")
30
+ return df_out, None
31
+ if "code_bv" in df_out.columns:
32
+ df_out["code_commune"] = df_out["code_bv"].astype(str).str.slice(0, 5)
33
+ df_out["code_commune"] = df_out["code_commune"].astype("string")
34
+ valid = df_out["code_commune"].str.len() == 5
35
+ if not valid.any():
36
+ return df_out, "Impossible de dériver code_commune depuis code_bv (format inattendu)."
37
+ return df_out, None
38
+ df_out["code_commune"] = pd.NA
39
+ return df_out, "Aucune colonne commune disponible (code_commune/Code de la commune/code_bv)."
40
+
41
+
42
+ df, commune_warning = resolve_code_commune(df)
43
+ df["code_commune"] = (
44
+ df["code_commune"]
45
+ .astype(str)
46
+ .str.replace(".0", "", regex=False)
47
+ .str.replace(r"\D", "", regex=True)
48
+ .str.zfill(5)
49
+ .astype("string")
50
+ )
51
+ df_sete = df[df["code_commune"] == SETE_CODE_INSEE].copy()
52
+ df_sete["tour"] = pd.to_numeric(df_sete["tour"], errors="coerce").astype("Int64")
53
+
54
+ # Colonnes blocs
55
+ BASE_BLOCS = [
56
+ "droite_modere",
57
+ "gauche_modere",
58
+ "gauche_dure",
59
+ "droite_dure",
60
+ "centre",
61
+ "extreme_gauche",
62
+ "extreme_droite",
63
+ "autre",
64
+ ]
65
+ BLOC_LABELS = [b for b in BASE_BLOCS if f"part_bloc_{b}" in df_sete.columns]
66
+ BLOC_COLS = [f"part_bloc_{b}" for b in BLOC_LABELS]
67
+
68
+ # =========================
69
+ # Fonctions métier
70
+ # =========================
71
+
72
+ def compute_national_reference(df_all, type_scrutin, tour):
73
+ """
74
+ Calcule les parts nationales par bloc pour un scrutin et un tour donnés.
75
+ """
76
+ if not BLOC_COLS:
77
+ return {}
78
+ df_nat = df_all[
79
+ (df_all["type_scrutin"] == type_scrutin)
80
+ & (df_all["tour"] == tour)
81
+ ]
82
+
83
+ # pondération par exprimés
84
+ weights = df_nat["exprimes"].replace(0, np.nan)
85
+
86
+ national = {}
87
+ for col in BLOC_COLS:
88
+ national[col] = np.nansum(df_nat[col] * weights) / np.nansum(weights)
89
+
90
+ return national
91
+
92
+
93
+ def table_sete(type_scrutin, tour):
94
+ if not BLOC_COLS:
95
+ return pd.DataFrame({"info": ["Colonnes part_bloc_* absentes."]})
96
+ tour_val = pd.to_numeric(tour, errors="coerce")
97
+ if pd.isna(tour_val):
98
+ return pd.DataFrame({"info": ["Tour invalide."]})
99
+ # données locales
100
+ local = df_sete[
101
+ (df_sete["type_scrutin"] == type_scrutin)
102
+ & (df_sete["tour"] == int(tour_val))
103
+ ].copy()
104
+
105
+ if local.empty:
106
+ return pd.DataFrame({"info": ["Aucune donnée disponible"]})
107
+
108
+ # référence nationale
109
+ nat = compute_national_reference(df, type_scrutin, tour)
110
+
111
+ # construction tableau affiché
112
+ rows = []
113
+
114
+ for _, row in local.iterrows():
115
+ r = {
116
+ "code_bv": row["code_bv"],
117
+ "nom_bv": row.get("nom_bv", ""),
118
+ }
119
+
120
+ for col in BLOC_COLS:
121
+ part = row[col]
122
+ ecart = part - nat.get(col, 0)
123
+
124
+ r[col.replace("part_bloc_", "")] = round(part * 100, 2)
125
+ r[col.replace("part_bloc_", "") + "_ecart_nat"] = round(ecart * 100, 2)
126
+
127
+ rows.append(r)
128
+
129
+ result = pd.DataFrame(rows)
130
+
131
+ # tri par écart extrême droite (exemple)
132
+ if "extreme_droite_ecart_nat" in result.columns:
133
+ result = result.sort_values(
134
+ "extreme_droite_ecart_nat", ascending=False
135
+ )
136
+
137
+ return result
138
+
139
+
140
+ def get_bv_timeseries(code_bv: str, tour: int | None) -> pd.DataFrame:
141
+ if df_sete.empty or not BLOC_COLS:
142
+ return pd.DataFrame(columns=["date_scrutin"] + BLOC_COLS)
143
+ subset = df_sete[df_sete["code_bv"].astype(str) == str(code_bv)].copy()
144
+ subset["tour"] = pd.to_numeric(subset["tour"], errors="coerce").astype("Int64")
145
+ if tour is not None:
146
+ subset = subset[subset["tour"] == tour]
147
+ subset = subset.dropna(subset=["date_scrutin"]).sort_values("date_scrutin")
148
+ return subset[["date_scrutin"] + BLOC_COLS]
149
+
150
+
151
+ def plot_bv_timeseries(code_bv: str, tour_choice, bloc_choices=None):
152
+ tour = None if tour_choice == "Tous" else int(tour_choice)
153
+ fig, ax = plt.subplots(figsize=(8, 4))
154
+ if not BLOC_COLS:
155
+ ax.text(0.5, 0.5, "Colonnes part_bloc_* absentes.", ha="center", va="center")
156
+ ax.axis("off")
157
+ return fig
158
+ df_ts = get_bv_timeseries(code_bv, tour)
159
+ if df_ts.empty:
160
+ tours_avail = (
161
+ df_sete[df_sete["code_bv"].astype(str) == str(code_bv)]["tour"]
162
+ .dropna()
163
+ .unique()
164
+ .tolist()
165
+ )
166
+ ax.text(
167
+ 0.5,
168
+ 0.5,
169
+ f"Aucune donnée après filtre tour={tour}. Valeurs disponibles: {sorted(tours_avail)}",
170
+ ha="center",
171
+ va="center",
172
+ wrap=True,
173
+ )
174
+ ax.axis("off")
175
+ return fig
176
+
177
+ selected = bloc_choices or BLOC_LABELS
178
+ selected_cols = [f"part_bloc_{b}" for b in selected if f"part_bloc_{b}" in df_ts.columns]
179
+ if not selected_cols:
180
+ ax.text(0.5, 0.5, "Aucun bloc sélectionné.", ha="center", va="center")
181
+ ax.axis("off")
182
+ return fig
183
+ for col in selected_cols:
184
+ ax.plot(df_ts["date_scrutin"], df_ts[col], label=col.replace("part_bloc_", ""))
185
+ ax.set_title(f"Évolution politique – BV {code_bv}")
186
+ ax.set_ylabel("Part des voix (exprimés)")
187
+ ax.grid(True, alpha=0.3)
188
+ ax.legend(bbox_to_anchor=(1.02, 1), loc="upper left", borderaxespad=0, fontsize=8)
189
+ fig.autofmt_xdate()
190
+ fig.tight_layout()
191
+ return fig
192
+
193
+
194
+ # =========================
195
+ # Interface Gradio
196
+ # =========================
197
+
198
+ def format_bv_label(code_bv: str) -> str:
199
+ code_str = str(code_bv)
200
+ if code_str.isdigit() and code_str.startswith(SETE_CODE_INSEE) and len(code_str) == 9:
201
+ bureau_num = code_str[-4:]
202
+ return f"BV {int(bureau_num)} ({code_str})"
203
+ return code_str
204
+
205
+
206
+ bv_values = (
207
+ sorted(df_sete["code_bv"].astype(str).unique().tolist())
208
+ if "code_bv" in df_sete.columns
209
+ else []
210
+ )
211
+ bv_choices = [(format_bv_label(code), code) for code in bv_values]
212
+ scrutins = sorted(df_sete["type_scrutin"].unique())
213
+ tours = sorted(df_sete["tour"].dropna().unique())
214
+ tour_options = ["Tous"] + [str(t) for t in tours]
215
+ status_messages = []
216
+ if commune_warning:
217
+ status_messages.append(commune_warning)
218
+ if df_sete.empty:
219
+ status_messages.append(
220
+ "Aucune ligne pour la commune 34301 (Sète). Vérifie `code_commune` / le filtre."
221
+ )
222
+ if not BLOC_COLS:
223
+ status_messages.append("Colonnes part_bloc_* absentes dans bv_features.")
224
+ missing_blocs = [f"part_bloc_{b}" for b in BASE_BLOCS if f"part_bloc_{b}" not in df_sete.columns]
225
+ if missing_blocs:
226
+ status_messages.append(f"Colonnes blocs manquantes: {', '.join(missing_blocs)}")
227
+ tour_dtype = str(df_sete["tour"].dtype) if "tour" in df_sete.columns else "n/a"
228
+ tour_sample = sorted(df_sete["tour"].dropna().unique().tolist())[:10]
229
+ status_messages.append(f"tour dtype: {tour_dtype}")
230
+ status_messages.append(f"tours disponibles (échantillon): {tour_sample}")
231
+ status_messages.append(
232
+ f"df_sete: {len(df_sete)} lignes, {df_sete['code_bv'].nunique() if 'code_bv' in df_sete.columns else 0} BV"
233
+ )
234
+ status_messages.append(f"blocs actifs: {', '.join(BLOC_LABELS) if BLOC_LABELS else 'aucun'}")
235
+ status_text = "\n".join(f"- {msg}" for msg in status_messages)
236
+
237
+ with gr.Blocks(title="Résultats électoraux – Bureaux de vote de Sète") as app:
238
+ gr.Markdown(
239
+ """
240
+ # 🗳️ Résultats électoraux – Ville de Sète
241
+
242
+ **Bureaux de vote uniquement – comparaison au niveau national**
243
+
244
+ Les pourcentages sont exprimés en **% des exprimés**.
245
+ Les écarts sont en **points par rapport au national**.
246
+ """
247
+ )
248
+ if status_text:
249
+ gr.Markdown(f"**Alertes**\n{status_text}")
250
+
251
+ with gr.Tabs():
252
+ with gr.Tab("Bureaux de vote"):
253
+ with gr.Row():
254
+ type_scrutin = gr.Dropdown(
255
+ scrutins,
256
+ label="Type de scrutin",
257
+ value=scrutins[0] if scrutins else None,
258
+ )
259
+ tour = gr.Dropdown(
260
+ tours,
261
+ label="Tour",
262
+ value=tours[0] if tours else None,
263
+ )
264
+
265
+ output = gr.Dataframe(
266
+ label="Bureaux de vote – parts locales et écart au national",
267
+ interactive=False,
268
+ wrap=True,
269
+ )
270
+
271
+ btn = gr.Button("Afficher")
272
+
273
+ btn.click(
274
+ fn=table_sete,
275
+ inputs=[type_scrutin, tour],
276
+ outputs=output,
277
+ )
278
+
279
+ with gr.Tab("Évolution temporelle"):
280
+ bv_selector = gr.Dropdown(
281
+ bv_choices,
282
+ label="Bureau de vote",
283
+ value=bv_values[0] if bv_values else None,
284
+ )
285
+ tour_selector = gr.Dropdown(
286
+ tour_options,
287
+ label="Tour",
288
+ value="Tous",
289
+ )
290
+ blocs_selector = gr.Dropdown(
291
+ BLOC_LABELS,
292
+ label="Blocs à afficher",
293
+ value=BLOC_LABELS,
294
+ multiselect=True,
295
+ )
296
+ plot = gr.Plot(
297
+ value=plot_bv_timeseries(
298
+ bv_values[0] if bv_values else "", "Tous", BLOC_LABELS
299
+ )
300
+ )
301
+
302
+ bv_selector.change(
303
+ fn=plot_bv_timeseries,
304
+ inputs=[bv_selector, tour_selector, blocs_selector],
305
+ outputs=plot,
306
+ )
307
+ tour_selector.change(
308
+ fn=plot_bv_timeseries,
309
+ inputs=[bv_selector, tour_selector, blocs_selector],
310
+ outputs=plot,
311
+ )
312
+ blocs_selector.change(
313
+ fn=plot_bv_timeseries,
314
+ inputs=[bv_selector, tour_selector, blocs_selector],
315
+ outputs=plot,
316
+ )
317
+
318
+ # =========================
319
+ # Lancement
320
+ # =========================
321
+ # Tests manuels:
322
+ # 1) Lancer l'app.
323
+ # 2) Onglet "Évolution temporelle": choisir un BV, tester Tous / Tour 1 / Tour 2.
324
+ # 3) Vérifier que la légende n'occulte pas les courbes et que seuls 8 blocs apparaissent.
325
+ # 4) Vérifier le libellé BV (BV X + code) et les alertes en haut de page.
326
+
327
+ if __name__ == "__main__":
328
+ app.launch()
app/gradio_app.py ADDED
@@ -0,0 +1,1645 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import base64
4
+ import io
5
+ import json
6
+ import logging
7
+ import re
8
+ import warnings
9
+ from html import escape
10
+ from pathlib import Path
11
+ from typing import Dict, Tuple
12
+
13
+ import gradio as gr
14
+ import joblib
15
+ import numpy as np
16
+ import pandas as pd
17
+ import sqlalchemy as sa
18
+
19
+ from src.constants import CANDIDATE_CATEGORIES
20
+ from src.db.schema import get_engine
21
+ from src.features.build_features import (
22
+ aggregate_by_event,
23
+ compute_national_reference,
24
+ expand_by_category,
25
+ load_elections_long,
26
+ load_mapping,
27
+ )
28
+
29
+ LOGGER = logging.getLogger(__name__)
30
+ COMMUNE_CODE_SETE = "34301"
31
+ MODEL_DIR = Path("models")
32
+ FEATURE_COLS_PATH = MODEL_DIR / "feature_columns.json"
33
+ RESIDUAL_INTERVALS_PATH = Path("reports/residual_intervals.json")
34
+ GEO_DIR = Path("data/geo")
35
+ DEFAULT_TARGETS = [
36
+ ("municipales", 2026),
37
+ ("legislatives", 2027),
38
+ ("presidentielles", 2027),
39
+ ]
40
+ FEATURE_CACHE: Dict[Tuple[str, int], Tuple[pd.DataFrame, Dict[str, Dict[Tuple[str, str], float]]]] = {}
41
+ ELECTION_KEY_SEP = "|"
42
+ ELECTION_TYPE_LABELS = {
43
+ "municipales": "Municipales",
44
+ "legislatives": "Législatives",
45
+ "presidentielles": "Présidentielles",
46
+ "europeennes": "Européennes",
47
+ "regionales": "Régionales",
48
+ "departementales": "Départementales",
49
+ }
50
+ HISTORY_OUTPUT_COLUMNS = ["categorie", "score_%"]
51
+ PREDICTION_OUTPUT_COLUMNS = ["categorie", "nombre"]
52
+ INTERVAL_OUTPUT_COLUMNS = ["categorie", "baseline_%", "min_%", "max_%", "baseline", "min", "max"]
53
+ SIM_OUTPUT_COLUMNS = ["categorie", "baseline", "apres_transfert", "delta"]
54
+ OPPORTUNITY_OUTPUT_COLUMNS = [
55
+ "bureau",
56
+ "gain_cible",
57
+ "score_base",
58
+ "score_apres",
59
+ "top_base",
60
+ "top_apres",
61
+ "bascule",
62
+ ]
63
+ DISPLAY_CATEGORY_ORDER = [
64
+ "extreme_gauche",
65
+ "gauche_dure",
66
+ "gauche_modere",
67
+ "centre",
68
+ "droite_modere",
69
+ "droite_dure",
70
+ "extreme_droite",
71
+ ]
72
+ PREDICTION_CATEGORY_ORDER = DISPLAY_CATEGORY_ORDER + ["blancs", "nuls", "abstention"]
73
+ DISPLAY_CATEGORY_LABELS = {
74
+ "extreme_gauche": "extrême-gauche",
75
+ "gauche_dure": "gauche dure",
76
+ "gauche_modere": "gauche modérée",
77
+ "centre": "centre",
78
+ "droite_modere": "droite modérée",
79
+ "droite_dure": "droite dure",
80
+ "extreme_droite": "extrême-droite",
81
+ "blancs": "blancs",
82
+ "nuls": "nuls",
83
+ "abstention": "abstentions",
84
+ }
85
+ DISPLAY_CATEGORY_COLORS = {
86
+ "extreme_gauche": "#7f1d1d",
87
+ "gauche_dure": "#dc2626",
88
+ "gauche_modere": "#f472b6",
89
+ "centre": "#facc15",
90
+ "droite_modere": "#60a5fa",
91
+ "droite_dure": "#1e3a8a",
92
+ "extreme_droite": "#111827",
93
+ }
94
+ EXTRA_CATEGORY_COLORS = {
95
+ "blancs": "#e5e7eb",
96
+ "nuls": "#9ca3af",
97
+ "abstention": "#6b7280",
98
+ }
99
+ DISPLAY_LABEL_COLORS = {
100
+ DISPLAY_CATEGORY_LABELS[key]: color for key, color in DISPLAY_CATEGORY_COLORS.items()
101
+ }
102
+ DISPLAY_LABEL_COLORS.update(
103
+ {DISPLAY_CATEGORY_LABELS[key]: color for key, color in EXTRA_CATEGORY_COLORS.items()}
104
+ )
105
+ CATEGORY_LABEL_TO_KEY = {label: key for key, label in DISPLAY_CATEGORY_LABELS.items()}
106
+ TRANSFER_CATEGORY_LABELS = [DISPLAY_CATEGORY_LABELS[key] for key in PREDICTION_CATEGORY_ORDER]
107
+ DEFAULT_RESIDUAL_SPREAD = 0.03
108
+ INTERVAL_BANDS = {
109
+ "80% (p10-p90)": ("q10", "q90"),
110
+ "90% (p05-p95)": ("q05", "q95"),
111
+ }
112
+ NEUTRAL_MARGIN_SHARE = 0.10
113
+
114
+ try:
115
+ from numpy import RankWarning as NP_RANK_WARNING # type: ignore[attr-defined]
116
+ except Exception:
117
+ class NP_RANK_WARNING(UserWarning):
118
+ pass
119
+
120
+
121
+ def ordered_categories() -> list[str]:
122
+ return [cat for cat in DISPLAY_CATEGORY_ORDER if cat in CANDIDATE_CATEGORIES]
123
+
124
+
125
+ def load_residual_intervals(path: Path = RESIDUAL_INTERVALS_PATH) -> Dict[str, object]:
126
+ if not path.exists():
127
+ return {}
128
+ try:
129
+ payload = json.loads(path.read_text())
130
+ except Exception:
131
+ return {}
132
+ if isinstance(payload, dict):
133
+ return payload
134
+ return {}
135
+
136
+
137
+ def get_interval_bounds(
138
+ residuals: Dict[str, Dict[str, float]],
139
+ category: str,
140
+ band_label: str,
141
+ ) -> Tuple[float, float]:
142
+ keys = INTERVAL_BANDS.get(band_label, ("q10", "q90"))
143
+ cat_resid = residuals.get(category, {})
144
+ low = cat_resid.get(keys[0])
145
+ high = cat_resid.get(keys[1])
146
+ if low is None or high is None:
147
+ return -DEFAULT_RESIDUAL_SPREAD, DEFAULT_RESIDUAL_SPREAD
148
+ return float(low), float(high)
149
+
150
+
151
+ def build_interval_table(
152
+ shares_by_cat: Dict[str, float],
153
+ exprimes_total: int,
154
+ residuals: Dict[str, Dict[str, float]],
155
+ band_label: str,
156
+ ) -> pd.DataFrame:
157
+ rows = []
158
+ for cat in ordered_categories():
159
+ share = float(shares_by_cat.get(cat, 0.0))
160
+ low_resid, high_resid = get_interval_bounds(residuals, cat, band_label)
161
+ share_low = float(np.clip(share + low_resid, 0.0, 1.0))
162
+ share_high = float(np.clip(share + high_resid, 0.0, 1.0))
163
+ count = int(round(share * exprimes_total))
164
+ count_low = int(round(share_low * exprimes_total))
165
+ count_high = int(round(share_high * exprimes_total))
166
+ if count_low > count_high:
167
+ count_low, count_high = count_high, count_low
168
+ share_low, share_high = share_high, share_low
169
+ rows.append(
170
+ {
171
+ "categorie": DISPLAY_CATEGORY_LABELS.get(cat, cat),
172
+ "baseline_%": round(share * 100, 1),
173
+ "min_%": round(share_low * 100, 1),
174
+ "max_%": round(share_high * 100, 1),
175
+ "baseline": count,
176
+ "min": count_low,
177
+ "max": count_high,
178
+ }
179
+ )
180
+ return pd.DataFrame(rows, columns=INTERVAL_OUTPUT_COLUMNS)
181
+
182
+
183
+ def build_interval_chart(
184
+ df: pd.DataFrame,
185
+ *,
186
+ value_col: str = "baseline",
187
+ low_col: str = "min",
188
+ high_col: str = "max",
189
+ color_map: Dict[str, str] | None = None,
190
+ ylabel: str = "Nombre d'électeurs",
191
+ ):
192
+ try:
193
+ import matplotlib.pyplot as plt
194
+ except Exception:
195
+ return None
196
+ if df.empty or value_col not in df.columns:
197
+ return None
198
+ labels = df["categorie"].astype(str).tolist()
199
+ values = df[value_col].astype(float).to_numpy()
200
+ low_vals = df[low_col].astype(float).to_numpy()
201
+ high_vals = df[high_col].astype(float).to_numpy()
202
+ lower_err = np.maximum(0.0, values - low_vals)
203
+ upper_err = np.maximum(0.0, high_vals - values)
204
+ yerr = np.vstack([lower_err, upper_err])
205
+ colors = [color_map.get(label, "#3b82f6") for label in labels] if color_map else "#3b82f6"
206
+ plt.figure(figsize=(6, 3))
207
+ plt.bar(labels, values, color=colors, yerr=yerr, capsize=4)
208
+ plt.xticks(rotation=30, ha="right")
209
+ plt.ylabel(ylabel)
210
+ plt.tight_layout()
211
+ return plt
212
+
213
+
214
+ def apply_transfers(
215
+ counts: Dict[str, int],
216
+ total_inscrits: int,
217
+ transfers: list[Tuple[str, str, float]],
218
+ ) -> Dict[str, int]:
219
+ updated = {key: int(value) for key, value in counts.items()}
220
+ for source, target, delta_pct in transfers:
221
+ if delta_pct <= 0 or source == target:
222
+ continue
223
+ delta_count = int(round(total_inscrits * float(delta_pct) / 100.0))
224
+ if delta_count <= 0:
225
+ continue
226
+ available = max(0, int(updated.get(source, 0)))
227
+ moved = min(available, delta_count)
228
+ updated[source] = available - moved
229
+ updated[target] = int(updated.get(target, 0)) + moved
230
+ return updated
231
+
232
+
233
+ def build_simulation_table(
234
+ baseline: Dict[str, int],
235
+ updated: Dict[str, int],
236
+ ) -> pd.DataFrame:
237
+ rows = []
238
+ for cat in PREDICTION_CATEGORY_ORDER:
239
+ base = int(baseline.get(cat, 0))
240
+ new = int(updated.get(cat, 0))
241
+ rows.append(
242
+ {
243
+ "categorie": DISPLAY_CATEGORY_LABELS.get(cat, cat),
244
+ "baseline": base,
245
+ "apres_transfert": new,
246
+ "delta": new - base,
247
+ }
248
+ )
249
+ return pd.DataFrame(rows, columns=SIM_OUTPUT_COLUMNS)
250
+
251
+
252
+ def load_geojson_features(geo_dir: Path = GEO_DIR) -> list[dict]:
253
+ if not geo_dir.exists():
254
+ return []
255
+ paths = sorted(geo_dir.glob("*.geojson")) + sorted(geo_dir.glob("*.json"))
256
+ features: list[dict] = []
257
+ for path in paths:
258
+ try:
259
+ payload = json.loads(path.read_text())
260
+ except Exception:
261
+ continue
262
+ if isinstance(payload, dict):
263
+ features.extend(payload.get("features", []))
264
+ return features
265
+
266
+
267
+ def extract_bureau_number(label: str | None) -> int | None:
268
+ if not label:
269
+ return None
270
+ match = re.search(r"(\d+)", str(label))
271
+ if not match:
272
+ return None
273
+ try:
274
+ return int(match.group(1))
275
+ except ValueError:
276
+ return None
277
+
278
+
279
+ def match_bureau_code(commune_code: str, bureau_num: int, available_codes: set[str]) -> str:
280
+ padded = str(bureau_num).zfill(4)
281
+ candidates = [f"{commune_code}-{padded}", f"{commune_code}{padded}"]
282
+ for candidate in candidates:
283
+ if candidate in available_codes:
284
+ return candidate
285
+ return candidates[-1]
286
+
287
+
288
+ def _iter_coords(geom: dict) -> list[Tuple[float, float]]:
289
+ coords = []
290
+ geom_type = geom.get("type")
291
+ if geom_type == "Polygon":
292
+ for ring in geom.get("coordinates", []):
293
+ coords.extend([(lon, lat) for lon, lat in ring])
294
+ elif geom_type == "MultiPolygon":
295
+ for polygon in geom.get("coordinates", []):
296
+ for ring in polygon:
297
+ coords.extend([(lon, lat) for lon, lat in ring])
298
+ return coords
299
+
300
+
301
+ def geojson_bounds(features: list[dict]) -> Tuple[Tuple[float, float], Tuple[float, float]] | None:
302
+ lons = []
303
+ lats = []
304
+ for feature in features:
305
+ geom = feature.get("geometry") or {}
306
+ for lon, lat in _iter_coords(geom):
307
+ lons.append(lon)
308
+ lats.append(lat)
309
+ if not lons or not lats:
310
+ return None
311
+ return (min(lats), min(lons)), (max(lats), max(lons))
312
+
313
+
314
+ def build_prediction_table_from_counts(counts_by_cat: Dict[str, int]) -> pd.DataFrame:
315
+ rows = []
316
+ for cat in ordered_categories():
317
+ rows.append({"categorie": DISPLAY_CATEGORY_LABELS.get(cat, cat), "nombre": int(counts_by_cat.get(cat, 0))})
318
+ for extra in ["blancs", "nuls", "abstention"]:
319
+ rows.append(
320
+ {
321
+ "categorie": DISPLAY_CATEGORY_LABELS[extra],
322
+ "nombre": int(counts_by_cat.get(extra, 0)),
323
+ }
324
+ )
325
+ return pd.DataFrame(rows, columns=PREDICTION_OUTPUT_COLUMNS)
326
+
327
+
328
+ def chart_base64_from_df(
329
+ df: pd.DataFrame,
330
+ value_col: str,
331
+ ylabel: str,
332
+ color_map: Dict[str, str],
333
+ ) -> str | None:
334
+ try:
335
+ import matplotlib.pyplot as plt
336
+ except Exception:
337
+ return None
338
+ if df.empty or value_col not in df.columns:
339
+ return None
340
+ labels = df["categorie"].astype(str).tolist()
341
+ values = pd.to_numeric(df[value_col], errors="coerce").fillna(0).tolist()
342
+ colors = [color_map.get(label, "#3b82f6") for label in labels]
343
+ fig, ax = plt.subplots(figsize=(4.5, 3.2))
344
+ ax.barh(labels, values, color=colors)
345
+ ax.invert_yaxis()
346
+ ax.set_xlabel(ylabel)
347
+ ax.tick_params(axis="y", labelsize=8)
348
+ fig.tight_layout()
349
+ buf = io.BytesIO()
350
+ fig.savefig(buf, format="png", dpi=150)
351
+ plt.close(fig)
352
+ return base64.b64encode(buf.getvalue()).decode("ascii")
353
+
354
+
355
+ def build_map_popup_html(
356
+ bureau_label: str,
357
+ table_df: pd.DataFrame,
358
+ chart_b64: str | None,
359
+ meta: str | None,
360
+ ) -> str:
361
+ title_html = f"<strong>{escape(bureau_label)}</strong>"
362
+ meta_html = f"<div style='margin:4px 0;'>{escape(meta)}</div>" if meta else ""
363
+ table_html = table_df.to_html(index=False, border=0)
364
+ img_html = ""
365
+ if chart_b64:
366
+ img_html = (
367
+ "<div style='margin-top:6px;'>"
368
+ f"<img src='data:image/png;base64,{chart_b64}' style='width:320px;height:auto;'/>"
369
+ "</div>"
370
+ )
371
+ return f"<div style='font-size:12px;'>{title_html}{meta_html}{table_html}{img_html}</div>"
372
+
373
+
374
+ def build_map_legend_html() -> str:
375
+ parts = []
376
+ for key in DISPLAY_CATEGORY_ORDER:
377
+ label = DISPLAY_CATEGORY_LABELS.get(key, key)
378
+ color = DISPLAY_CATEGORY_COLORS.get(key, "#9ca3af")
379
+ parts.append(
380
+ f"<span style='display:inline-flex;align-items:center;margin-right:10px;'>"
381
+ f"<span style='width:12px;height:12px;background:{color};display:inline-block;margin-right:6px;border:1px solid #111827;'></span>"
382
+ f"{escape(label)}</span>"
383
+ )
384
+ parts.append(
385
+ "<span style='display:inline-flex;align-items:center;margin-right:10px;'>"
386
+ "<span style='width:12px;height:12px;background:#ffffff;display:inline-block;margin-right:6px;border:1px solid #111827;'></span>"
387
+ "écart gauche/droite ≤ 10%</span>"
388
+ )
389
+ parts.append(
390
+ "<span style='display:inline-flex;align-items:center;margin-right:10px;'>"
391
+ "<span style='width:12px;height:12px;background:#9ca3af;display:inline-block;margin-right:6px;border:1px solid #111827;'></span>"
392
+ "données indisponibles</span>"
393
+ )
394
+ parts.append("<span style='font-size:12px;color:#6b7280;'>abstention non utilisée pour la couleur</span>")
395
+ return "<div style='margin-bottom:8px;'>" + " ".join(parts) + "</div>"
396
+
397
+
398
+ def build_bureau_map_html(
399
+ backend: "PredictorBackend",
400
+ target_type: str,
401
+ target_year: int,
402
+ ) -> str:
403
+ try:
404
+ import folium
405
+ except Exception:
406
+ return "<p>Folium n'est pas disponible. Installe-le via requirements.txt.</p>"
407
+
408
+ features = load_geojson_features()
409
+ if not features:
410
+ return "<p>Aucune geojson trouvée dans data/geo.</p>"
411
+
412
+ bounds = geojson_bounds(features)
413
+ if bounds is None:
414
+ return "<p>Impossible de calculer l'emprise de la carte.</p>"
415
+ (min_lat, min_lon), (max_lat, max_lon) = bounds
416
+ center = [(min_lat + max_lat) / 2, (min_lon + max_lon) / 2]
417
+ fmap = folium.Map(location=center, zoom_start=13, tiles="cartodbpositron")
418
+
419
+ available_codes = set(backend.available_bureaux())
420
+ for feature in features:
421
+ props = feature.get("properties", {})
422
+ label = props.get("name") or "Bureau"
423
+ bureau_num = extract_bureau_number(label)
424
+ if bureau_num is None:
425
+ code_bv = None
426
+ else:
427
+ code_bv = match_bureau_code(COMMUNE_CODE_SETE, bureau_num, available_codes)
428
+
429
+ fill_color = "#9ca3af"
430
+ popup_html = None
431
+ if code_bv is not None:
432
+ details, _, meta = backend.predict_bureau_details(code_bv, target_type, target_year)
433
+ if details is not None:
434
+ shares = details["shares_by_cat"]
435
+ left_share = float(shares.get("gauche_dure", 0.0) + shares.get("gauche_modere", 0.0))
436
+ right_share = float(shares.get("droite_dure", 0.0) + shares.get("droite_modere", 0.0))
437
+ if abs(left_share - right_share) <= NEUTRAL_MARGIN_SHARE:
438
+ fill_color = "#ffffff"
439
+ else:
440
+ winner = max(shares, key=shares.get)
441
+ fill_color = DISPLAY_CATEGORY_COLORS.get(winner, fill_color)
442
+
443
+ table_df = build_prediction_table_from_counts(details["counts"])
444
+ chart_b64 = chart_base64_from_df(
445
+ table_df,
446
+ value_col="nombre",
447
+ ylabel="Nombre d'electeurs",
448
+ color_map=DISPLAY_LABEL_COLORS,
449
+ )
450
+ popup_html = build_map_popup_html(str(label), table_df, chart_b64, meta)
451
+
452
+ def _style(_: dict, color=fill_color):
453
+ return {
454
+ "fillColor": color,
455
+ "color": "#111827",
456
+ "weight": 1,
457
+ "fillOpacity": 0.6,
458
+ }
459
+
460
+ geo = folium.GeoJson(feature, style_function=_style)
461
+ if popup_html:
462
+ geo.add_child(folium.Popup(popup_html, max_width=450))
463
+ geo.add_child(folium.Tooltip(str(label)))
464
+ geo.add_to(fmap)
465
+
466
+ fmap.fit_bounds([[min_lat, min_lon], [max_lat, max_lon]])
467
+ return fmap._repr_html_()
468
+
469
+
470
+ def _project_rate(
471
+ series: pd.Series,
472
+ years: pd.Series,
473
+ target_year: int,
474
+ *,
475
+ min_points_trend: int = 3,
476
+ clamp_to_observed: bool = True,
477
+ ) -> float | None:
478
+ df = pd.DataFrame(
479
+ {
480
+ "value": pd.to_numeric(series, errors="coerce"),
481
+ "year": pd.to_numeric(years, errors="coerce"),
482
+ }
483
+ ).dropna()
484
+ if df.empty:
485
+ return None
486
+ values = df["value"].to_numpy()
487
+ years_arr = df["year"].to_numpy()
488
+ if len(set(years_arr)) >= min_points_trend and len(df) >= min_points_trend:
489
+ with warnings.catch_warnings():
490
+ warnings.simplefilter("ignore", category=NP_RANK_WARNING)
491
+ try:
492
+ slope, intercept = np.polyfit(years_arr, values, 1)
493
+ projected = slope * target_year + intercept
494
+ except Exception:
495
+ projected = values[-1]
496
+ else:
497
+ projected = values[-1]
498
+ if clamp_to_observed and len(values):
499
+ projected = min(max(projected, float(np.nanmin(values))), float(np.nanmax(values)))
500
+ return float(min(1.0, max(0.0, projected)))
501
+
502
+
503
+ def _allocate_counts(shares: np.ndarray, total: int) -> np.ndarray:
504
+ if total <= 0 or shares.size == 0:
505
+ return np.zeros_like(shares, dtype=int)
506
+ shares = np.clip(shares, 0, None)
507
+ if shares.sum() == 0:
508
+ return np.zeros_like(shares, dtype=int)
509
+ shares = shares / shares.sum()
510
+ raw = shares * total
511
+ floors = np.floor(raw)
512
+ remainder = int(total - floors.sum())
513
+ if remainder > 0:
514
+ order = np.argsort(-(raw - floors))
515
+ for idx in order[:remainder]:
516
+ floors[idx] += 1
517
+ return floors.astype(int)
518
+
519
+
520
+ def load_bureau_event_stats(commune_code: str) -> pd.DataFrame:
521
+ candidates = [
522
+ Path("data/processed/elections_blocs.parquet"),
523
+ Path("data/processed/elections_blocs.csv"),
524
+ Path("data/interim/elections_long.parquet"),
525
+ Path("data/interim/elections_long.csv"),
526
+ ]
527
+ df = pd.DataFrame()
528
+ best = pd.DataFrame()
529
+ for path in candidates:
530
+ if not path.exists():
531
+ continue
532
+ if path.suffix == ".parquet":
533
+ df = pd.read_parquet(path)
534
+ else:
535
+ df = pd.read_csv(path, sep=";")
536
+ if df.empty:
537
+ continue
538
+ if "type_scrutin" not in df.columns and "election_type" in df.columns:
539
+ df["type_scrutin"] = df["election_type"]
540
+ if "annee" not in df.columns and "election_year" in df.columns:
541
+ df["annee"] = df["election_year"]
542
+ if "tour" not in df.columns and "round" in df.columns:
543
+ df["tour"] = df["round"]
544
+ df["date_scrutin"] = pd.to_datetime(df.get("date_scrutin"), errors="coerce")
545
+ for col in ["inscrits", "votants", "blancs", "nuls"]:
546
+ if col in df.columns:
547
+ df[col] = pd.to_numeric(df[col], errors="coerce")
548
+ else:
549
+ df[col] = np.nan
550
+ if "code_commune" in df.columns:
551
+ df["code_commune"] = df["code_commune"].astype(str)
552
+ df = df[df["code_commune"] == str(commune_code)]
553
+ else:
554
+ df = df[df["code_bv"].astype(str).str.startswith(str(commune_code))]
555
+ df = df.dropna(subset=["code_bv"])
556
+ if df.empty:
557
+ continue
558
+ has_blancs = df["blancs"].notna().any() or df["nuls"].notna().any()
559
+ if has_blancs:
560
+ best = df
561
+ break
562
+ if best.empty:
563
+ best = df
564
+ df = best
565
+ if df.empty:
566
+ return df
567
+ group_cols = [col for col in ["code_bv", "type_scrutin", "annee", "tour", "date_scrutin"] if col in df.columns]
568
+ agg = df.groupby(group_cols, as_index=False).agg(
569
+ inscrits=("inscrits", "max"),
570
+ votants=("votants", "max"),
571
+ blancs=("blancs", "max"),
572
+ nuls=("nuls", "max"),
573
+ )
574
+ if "date_scrutin" in agg.columns:
575
+ agg = agg.sort_values("date_scrutin")
576
+ agg["election_type"] = agg.get("type_scrutin")
577
+ agg["election_type"] = agg["election_type"].astype("string").str.strip().str.lower()
578
+ agg["election_year"] = pd.to_numeric(agg.get("annee"), errors="coerce")
579
+ agg["round"] = pd.to_numeric(agg.get("tour"), errors="coerce").fillna(1).astype(int)
580
+ base_inscrits = agg["inscrits"].replace(0, np.nan)
581
+ agg["turnout_pct"] = agg["votants"] / base_inscrits
582
+ agg["blancs_pct"] = agg["blancs"] / base_inscrits
583
+ agg["nuls_pct"] = agg["nuls"] / base_inscrits
584
+ return agg[
585
+ [
586
+ "code_bv",
587
+ "election_type",
588
+ "election_year",
589
+ "round",
590
+ "date_scrutin",
591
+ "inscrits",
592
+ "votants",
593
+ "blancs",
594
+ "nuls",
595
+ "turnout_pct",
596
+ "blancs_pct",
597
+ "nuls_pct",
598
+ ]
599
+ ]
600
+
601
+
602
+ def load_commune_event_stats(commune_code: str) -> pd.DataFrame:
603
+ candidates = [
604
+ Path("data/processed/commune_event_stats.parquet"),
605
+ Path("data/processed/commune_event_stats.csv"),
606
+ ]
607
+ df = pd.DataFrame()
608
+ for path in candidates:
609
+ if not path.exists():
610
+ continue
611
+ if path.suffix == ".parquet":
612
+ df = pd.read_parquet(path)
613
+ else:
614
+ df = pd.read_csv(path, sep=";")
615
+ if not df.empty:
616
+ break
617
+ if df.empty:
618
+ return df
619
+ if "type_scrutin" not in df.columns and "election_type" in df.columns:
620
+ df["type_scrutin"] = df["election_type"]
621
+ if "annee" not in df.columns and "election_year" in df.columns:
622
+ df["annee"] = df["election_year"]
623
+ if "tour" not in df.columns and "round" in df.columns:
624
+ df["tour"] = df["round"]
625
+ df["date_scrutin"] = pd.to_datetime(df.get("date_scrutin"), errors="coerce")
626
+ for col in ["inscrits", "votants", "blancs", "nuls"]:
627
+ if col in df.columns:
628
+ df[col] = pd.to_numeric(df[col], errors="coerce")
629
+ else:
630
+ df[col] = np.nan
631
+ if "code_commune" in df.columns:
632
+ df["code_commune"] = df["code_commune"].astype(str)
633
+ df = df[df["code_commune"] == str(commune_code)]
634
+ else:
635
+ return pd.DataFrame()
636
+ if df.empty:
637
+ return df
638
+ base_inscrits = df["inscrits"].replace(0, np.nan)
639
+ if "turnout_pct" not in df.columns:
640
+ df["turnout_pct"] = df["votants"] / base_inscrits
641
+ if "blancs_pct" not in df.columns:
642
+ df["blancs_pct"] = df["blancs"] / base_inscrits
643
+ if "nuls_pct" not in df.columns:
644
+ df["nuls_pct"] = df["nuls"] / base_inscrits
645
+ df["election_type"] = df["type_scrutin"].astype("string").str.strip().str.lower()
646
+ df["election_year"] = pd.to_numeric(df.get("annee"), errors="coerce")
647
+ df["round"] = pd.to_numeric(df.get("tour"), errors="coerce").fillna(1).astype(int)
648
+ return df[
649
+ [
650
+ "code_commune",
651
+ "election_type",
652
+ "election_year",
653
+ "round",
654
+ "date_scrutin",
655
+ "inscrits",
656
+ "votants",
657
+ "blancs",
658
+ "nuls",
659
+ "turnout_pct",
660
+ "blancs_pct",
661
+ "nuls_pct",
662
+ ]
663
+ ]
664
+
665
+
666
+ def format_backend_label(backend_kind: str) -> str:
667
+ return "PostgreSQL" if backend_kind == "postgres" else "fichiers locaux"
668
+
669
+
670
+ def format_election_type_label(election_type: str) -> str:
671
+ label = ELECTION_TYPE_LABELS.get(election_type)
672
+ if label:
673
+ return label
674
+ return str(election_type).replace("_", " ").title()
675
+
676
+
677
+ def format_election_label(
678
+ election_type: str,
679
+ election_year: int,
680
+ round_num: int,
681
+ date_scrutin: pd.Timestamp | None = None,
682
+ ) -> str:
683
+ base = f"{format_election_type_label(election_type)} {election_year} - Tour {round_num}"
684
+ if date_scrutin is None or pd.isna(date_scrutin):
685
+ return base
686
+ date_value = pd.to_datetime(date_scrutin).date().isoformat()
687
+ return f"{base} ({date_value})"
688
+
689
+
690
+ def format_election_key(election_type: str, election_year: int, round_num: int) -> str:
691
+ return f"{election_type}{ELECTION_KEY_SEP}{election_year}{ELECTION_KEY_SEP}{round_num}"
692
+
693
+
694
+ def parse_election_key(key: str) -> Tuple[str, int, int]:
695
+ parts = key.split(ELECTION_KEY_SEP)
696
+ if len(parts) != 3:
697
+ raise ValueError(f"Clé d'élection invalide: {key!r}")
698
+ return parts[0], int(parts[1]), int(parts[2])
699
+
700
+
701
+ def format_bureau_label(code_bv: str, bureau_label: str | None) -> str:
702
+ code = str(code_bv)
703
+ suffix = code.split("-")[-1] if "-" in code else code
704
+ if bureau_label is not None and not pd.isna(bureau_label):
705
+ label = str(bureau_label).strip()
706
+ if label and label != code:
707
+ return f"{label} ({code})"
708
+ return f"Bureau {suffix} ({code})"
709
+
710
+
711
+ def build_bureau_choices(history: pd.DataFrame) -> list[tuple[str, str]]:
712
+ if history.empty:
713
+ return []
714
+ if "bureau_label" in history.columns:
715
+ label_map = (
716
+ history[["code_bv", "bureau_label"]]
717
+ .dropna(subset=["code_bv"])
718
+ .drop_duplicates()
719
+ .sort_values("code_bv")
720
+ .groupby("code_bv", as_index=False)["bureau_label"]
721
+ .first()
722
+ )
723
+ return [
724
+ (format_bureau_label(row.code_bv, row.bureau_label), row.code_bv)
725
+ for row in label_map.itertuples(index=False)
726
+ ]
727
+ codes = sorted(history["code_bv"].dropna().unique().tolist())
728
+ return [(format_bureau_label(code, None), code) for code in codes]
729
+
730
+
731
+ def build_history_choices(history: pd.DataFrame) -> list[tuple[str, str]]:
732
+ if history.empty:
733
+ return []
734
+ events = (
735
+ history[["election_type", "election_year", "round", "date_scrutin"]]
736
+ .dropna(subset=["election_type", "election_year", "round"])
737
+ .drop_duplicates()
738
+ .groupby(["election_type", "election_year", "round"], as_index=False)
739
+ .agg(date_scrutin=("date_scrutin", "min"))
740
+ .sort_values(["election_year", "election_type", "round"])
741
+ )
742
+ return [
743
+ (
744
+ format_election_label(
745
+ row.election_type,
746
+ int(row.election_year),
747
+ int(row.round),
748
+ row.date_scrutin,
749
+ ),
750
+ format_election_key(row.election_type, int(row.election_year), int(row.round)),
751
+ )
752
+ for row in events.itertuples(index=False)
753
+ ]
754
+
755
+
756
+ def clean_history_frame(history: pd.DataFrame) -> pd.DataFrame:
757
+ if history.empty:
758
+ return history
759
+ clean = history.copy()
760
+ clean["code_bv"] = clean["code_bv"].astype("string").str.strip()
761
+ clean["election_type"] = clean["election_type"].astype("string").str.strip().str.lower()
762
+ clean["category"] = clean["category"].astype("string").str.strip().str.lower()
763
+ if "bureau_label" in clean.columns:
764
+ clean["bureau_label"] = clean["bureau_label"].astype("string").str.strip()
765
+ clean["election_year"] = pd.to_numeric(clean["election_year"], errors="coerce")
766
+ clean["round"] = pd.to_numeric(clean["round"], errors="coerce").fillna(1)
767
+ clean["date_scrutin"] = pd.to_datetime(clean["date_scrutin"], errors="coerce")
768
+ for col in ["share", "share_nat", "turnout_pct"]:
769
+ if col in clean.columns:
770
+ clean[col] = pd.to_numeric(clean[col], errors="coerce").clip(lower=0, upper=1)
771
+ clean = clean.dropna(subset=["code_bv", "election_type", "election_year", "round", "category"])
772
+ clean["election_year"] = clean["election_year"].astype(int)
773
+ clean["round"] = clean["round"].astype(int)
774
+ clean = clean[clean["category"].isin(CANDIDATE_CATEGORIES)]
775
+ return clean
776
+
777
+
778
+ def prepare_history_table(history_slice: pd.DataFrame) -> pd.DataFrame:
779
+ if history_slice.empty:
780
+ return pd.DataFrame(columns=HISTORY_OUTPUT_COLUMNS)
781
+ grouped = history_slice.groupby("category", as_index=False).agg(share=("share", "sum"))
782
+ clean = pd.DataFrame({"category": ordered_categories()}).merge(grouped, on="category", how="left")
783
+ clean["share"] = pd.to_numeric(clean["share"], errors="coerce").fillna(0).clip(lower=0, upper=1)
784
+ clean["score_%"] = (clean["share"] * 100).round(1)
785
+ clean["categorie"] = clean["category"].map(DISPLAY_CATEGORY_LABELS).fillna(clean["category"])
786
+ return clean[HISTORY_OUTPUT_COLUMNS]
787
+
788
+
789
+ def format_history_meta(history_slice: pd.DataFrame) -> str:
790
+ if history_slice.empty:
791
+ return ""
792
+ parts = []
793
+ dates = history_slice["date_scrutin"].dropna()
794
+ if not dates.empty:
795
+ date_value = pd.to_datetime(dates.iloc[0]).date().isoformat()
796
+ parts.append(f"Date du scrutin : {date_value}")
797
+ turnout_vals = pd.to_numeric(history_slice["turnout_pct"], errors="coerce").dropna()
798
+ if not turnout_vals.empty:
799
+ parts.append(f"Participation : {turnout_vals.iloc[0] * 100:.1f}%")
800
+ return " | ".join(parts)
801
+
802
+
803
+ def _code_bv_full(commune_code: str, bureau_code: str) -> str:
804
+ bureau_code = str(bureau_code).zfill(4)
805
+ return f"{commune_code}-{bureau_code}"
806
+
807
+
808
+ def load_history_from_db(commune_code: str) -> pd.DataFrame:
809
+ engine = get_engine()
810
+ query = sa.text(
811
+ """
812
+ select cm.insee_code as commune_code,
813
+ b.bureau_code,
814
+ b.bureau_label,
815
+ e.election_type,
816
+ e.election_year,
817
+ coalesce(e.round, 1) as round,
818
+ e.date as date_scrutin,
819
+ c.name as category,
820
+ rl.share_pct,
821
+ rl.turnout_pct,
822
+ rn.share_pct as share_nat
823
+ from results_local rl
824
+ join bureaux b on rl.bureau_id = b.id
825
+ join communes cm on b.commune_id = cm.id
826
+ join elections e on rl.election_id = e.id
827
+ join categories c on rl.category_id = c.id
828
+ left join results_national rn on rn.election_id = e.id and rn.category_id = rl.category_id
829
+ where cm.insee_code = :commune
830
+ """
831
+ )
832
+ df = pd.read_sql(query, engine, params={"commune": commune_code})
833
+ if df.empty:
834
+ raise RuntimeError("Aucune donnée dans la base pour la commune demandée.")
835
+ df["code_bv"] = df.apply(lambda r: _code_bv_full(r["commune_code"], r["bureau_code"]), axis=1)
836
+ df["date_scrutin"] = pd.to_datetime(df["date_scrutin"])
837
+ df["share"] = pd.to_numeric(df["share_pct"], errors="coerce") / 100
838
+ df["share_nat"] = pd.to_numeric(df["share_nat"], errors="coerce") / 100
839
+ df["turnout_pct"] = pd.to_numeric(df["turnout_pct"], errors="coerce") / 100
840
+ df["election_year"] = pd.to_numeric(df["election_year"], errors="coerce")
841
+ df["round"] = pd.to_numeric(df["round"], errors="coerce").fillna(1).astype(int)
842
+ return df[
843
+ [
844
+ "commune_code",
845
+ "code_bv",
846
+ "bureau_label",
847
+ "election_type",
848
+ "election_year",
849
+ "round",
850
+ "date_scrutin",
851
+ "category",
852
+ "share",
853
+ "share_nat",
854
+ "turnout_pct",
855
+ ]
856
+ ]
857
+
858
+
859
+ def load_history_from_files(commune_code: str) -> pd.DataFrame:
860
+ elections_long_all = load_elections_long(
861
+ Path("data/interim/elections_long.parquet"),
862
+ commune_code=commune_code,
863
+ )
864
+ mapping = load_mapping(Path("data/mapping_candidats_blocs.csv"))
865
+ expanded_all = expand_by_category(elections_long_all, mapping)
866
+ local_all = aggregate_by_event(expanded_all)
867
+ nat = compute_national_reference(local_all)
868
+
869
+ local = local_all[local_all["commune_code"] == commune_code].copy()
870
+ local = local.merge(nat, on=["election_type", "election_year", "round", "category"], how="left")
871
+ # Columns already in aggregate_by_event/compute_national_reference
872
+ if "share" not in local.columns:
873
+ raise RuntimeError("Colonne share absente du dataset local (fallback fichiers).")
874
+ local["bureau_label"] = None
875
+ local["share_nat"] = local.get("share_nat")
876
+ local["turnout_pct"] = local.get("turnout_pct")
877
+ return local.rename(
878
+ columns={
879
+ "annee": "election_year",
880
+ "tour": "round",
881
+ }
882
+ )[
883
+ [
884
+ "commune_code",
885
+ "code_bv",
886
+ "bureau_label",
887
+ "election_type",
888
+ "election_year",
889
+ "round",
890
+ "date_scrutin",
891
+ "category",
892
+ "share",
893
+ "share_nat",
894
+ "turnout_pct",
895
+ ]
896
+ ]
897
+
898
+
899
+ def references_from_history(history: pd.DataFrame, target_year: int) -> Dict[str, Dict[Tuple[str, str], float]]:
900
+ hist = history[history["election_year"] < target_year].copy()
901
+ leg = (
902
+ hist[hist["election_type"] == "legislatives"]
903
+ .sort_values("date_scrutin")
904
+ .groupby(["code_bv", "category"])["share"]
905
+ .last()
906
+ )
907
+ mun2020 = (
908
+ hist[(hist["election_type"] == "municipales") & (hist["election_year"] == 2020)]
909
+ .sort_values("date_scrutin")
910
+ .groupby(["code_bv", "category"])["share"]
911
+ .last()
912
+ )
913
+ return {"leg": leg.to_dict(), "mun2020": mun2020.to_dict()}
914
+
915
+
916
+ def build_features_from_history(history: pd.DataFrame, target_type: str, target_year: int) -> pd.DataFrame:
917
+ hist = history[history["election_year"] < target_year].copy()
918
+ if hist.empty:
919
+ return pd.DataFrame()
920
+ hist = hist.sort_values("date_scrutin")
921
+ hist["dev_to_nat"] = hist["share"] - hist["share_nat"]
922
+
923
+ last_any_share = hist.groupby(["code_bv", "category"])["share"].last()
924
+ last_any_dev = hist.groupby(["code_bv", "category"])["dev_to_nat"].last()
925
+ last_type_share = (
926
+ hist[hist["election_type"] == target_type]
927
+ .groupby(["code_bv", "category"])["share"]
928
+ .last()
929
+ )
930
+ last_type_dev = (
931
+ hist[hist["election_type"] == target_type]
932
+ .groupby(["code_bv", "category"])["dev_to_nat"]
933
+ .last()
934
+ )
935
+ swing_any = (
936
+ hist.groupby(["code_bv", "category"])["share"]
937
+ .apply(lambda s: s.iloc[-1] - s.iloc[-2] if len(s) >= 2 else np.nan)
938
+ .rename("swing_any")
939
+ )
940
+ turnout_any = hist.groupby("code_bv")["turnout_pct"].last()
941
+ turnout_type = (
942
+ hist[hist["election_type"] == target_type]
943
+ .groupby("code_bv")["turnout_pct"]
944
+ .last()
945
+ )
946
+
947
+ bureaux = sorted(hist["code_bv"].dropna().unique())
948
+ records = []
949
+ for code_bv in bureaux:
950
+ record = {
951
+ "commune_code": str(code_bv).split("-")[0],
952
+ "code_bv": code_bv,
953
+ "election_type": target_type,
954
+ "election_year": target_year,
955
+ "round": 1,
956
+ "date_scrutin": f"{target_year}-01-01",
957
+ "prev_turnout_any_lag1": turnout_any.get(code_bv, np.nan),
958
+ "prev_turnout_same_type_lag1": turnout_type.get(code_bv, np.nan),
959
+ }
960
+ for cat in CANDIDATE_CATEGORIES:
961
+ record[f"prev_share_any_lag1_{cat}"] = last_any_share.get((code_bv, cat), np.nan)
962
+ record[f"prev_share_type_lag1_{cat}"] = last_type_share.get((code_bv, cat), np.nan)
963
+ record[f"prev_dev_to_national_any_lag1_{cat}"] = last_any_dev.get((code_bv, cat), np.nan)
964
+ record[f"prev_dev_to_national_type_lag1_{cat}"] = last_type_dev.get((code_bv, cat), np.nan)
965
+ record[f"swing_any_{cat}"] = swing_any.get((code_bv, cat), np.nan)
966
+ records.append(record)
967
+ return pd.DataFrame.from_records(records)
968
+
969
+
970
+ def load_model() -> Path:
971
+ best_path = MODEL_DIR / "best_model.json"
972
+ if best_path.exists():
973
+ try:
974
+ payload = json.loads(best_path.read_text())
975
+ name = payload.get("name")
976
+ if name:
977
+ candidate = MODEL_DIR / f"{name}.joblib"
978
+ if candidate.exists():
979
+ return candidate
980
+ except Exception:
981
+ pass
982
+ if (MODEL_DIR / "hist_gradient_boosting.joblib").exists():
983
+ return MODEL_DIR / "hist_gradient_boosting.joblib"
984
+ joblibs = sorted(MODEL_DIR.glob("*.joblib"))
985
+ if not joblibs:
986
+ raise FileNotFoundError("Aucun modèle trouvé dans models/. Lancez src/model/train.py.")
987
+ return joblibs[0]
988
+
989
+
990
+ def load_feature_columns(path: Path, df: pd.DataFrame) -> list[str]:
991
+ if path.exists():
992
+ return json.loads(path.read_text())
993
+ exclude = {"commune_code", "code_bv", "election_type", "election_year", "round", "date_scrutin"}
994
+ return [c for c in df.columns if c not in exclude]
995
+
996
+
997
+ def format_delta(value) -> str:
998
+ if value is None or (isinstance(value, float) and np.isnan(value)):
999
+ return "N/A"
1000
+ sign = "+" if value >= 0 else ""
1001
+ return f"{sign}{round(value, 1)}"
1002
+
1003
+
1004
+ class PredictorBackend:
1005
+ def __init__(self, commune_code: str = COMMUNE_CODE_SETE):
1006
+ self.commune_code = commune_code
1007
+ self.backend = "local"
1008
+ try:
1009
+ self.history = load_history_from_db(commune_code)
1010
+ self.backend = "postgres"
1011
+ LOGGER.info("Backend PostgreSQL chargé (%s lignes)", len(self.history))
1012
+ except Exception as exc:
1013
+ LOGGER.warning("PostgreSQL indisponible (%s) -> fallback fichiers.", exc)
1014
+ self.history = load_history_from_files(commune_code)
1015
+ self.backend = "files"
1016
+ LOGGER.info("Backend fichiers chargé (%s lignes)", len(self.history))
1017
+ self.history = clean_history_frame(self.history)
1018
+ self.event_stats = load_bureau_event_stats(commune_code)
1019
+ self.commune_stats = load_commune_event_stats(commune_code)
1020
+ self.default_rates = {}
1021
+ self.default_rates_by_type: dict[str, dict[str, float]] = {}
1022
+ stats = self.commune_stats if not self.commune_stats.empty else self.event_stats
1023
+ if not stats.empty:
1024
+ if "round" in stats.columns:
1025
+ round1 = stats[stats["round"] == 1]
1026
+ if not round1.empty:
1027
+ stats = round1
1028
+ self.default_rates = {
1029
+ "turnout_pct": float(stats["turnout_pct"].median(skipna=True)),
1030
+ "blancs_pct": float(stats["blancs_pct"].median(skipna=True)),
1031
+ "nuls_pct": float(stats["nuls_pct"].median(skipna=True)),
1032
+ }
1033
+ if "election_type" in stats.columns:
1034
+ for etype, group in stats.groupby("election_type"):
1035
+ self.default_rates_by_type[str(etype)] = {
1036
+ "turnout_pct": float(group["turnout_pct"].median(skipna=True)),
1037
+ "blancs_pct": float(group["blancs_pct"].median(skipna=True)),
1038
+ "nuls_pct": float(group["nuls_pct"].median(skipna=True)),
1039
+ }
1040
+ self.model_path = load_model()
1041
+ self.model = joblib.load(self.model_path)
1042
+ # feature cache per target
1043
+ self.refs_cache: Dict[Tuple[str, int], Dict[str, Dict[Tuple[str, str], float]]] = {}
1044
+
1045
+ def available_bureaux(self) -> list[str]:
1046
+ return sorted(self.history["code_bv"].dropna().unique().tolist())
1047
+
1048
+ def available_targets(self) -> list[Tuple[str, int]]:
1049
+ existing = set()
1050
+ for row in self.history.itertuples(index=False):
1051
+ try:
1052
+ year = int(row.election_year) # type: ignore
1053
+ except Exception:
1054
+ continue
1055
+ existing.add((row.election_type, year))
1056
+ for t in DEFAULT_TARGETS:
1057
+ existing.add(t)
1058
+ return sorted(existing, key=lambda x: (x[1], x[0]))
1059
+
1060
+ def _get_features_and_refs(self, target_type: str, target_year: int) -> Tuple[pd.DataFrame, Dict[str, Dict[Tuple[str, str], float]]]:
1061
+ key = (target_type, target_year)
1062
+ if key not in FEATURE_CACHE:
1063
+ feature_df = build_features_from_history(self.history, target_type, target_year)
1064
+ refs = references_from_history(self.history, target_year)
1065
+ FEATURE_CACHE[key] = (feature_df, refs)
1066
+ return FEATURE_CACHE[key]
1067
+
1068
+ def predict_bureau_details(
1069
+ self,
1070
+ code_bv: str,
1071
+ target_type: str,
1072
+ target_year: int,
1073
+ inscrits_override: float | None = None,
1074
+ ) -> Tuple[Dict[str, object] | None, str, str]:
1075
+ feature_df, _ = self._get_features_and_refs(target_type, target_year)
1076
+ if feature_df.empty:
1077
+ return None, "Données insuffisantes", ""
1078
+ row = feature_df[feature_df["code_bv"] == code_bv].copy()
1079
+ if row.empty:
1080
+ return None, "Bureau non trouvé dans l'historique.", ""
1081
+
1082
+ feature_cols = load_feature_columns(FEATURE_COLS_PATH, feature_df)
1083
+ missing = [c for c in feature_cols if c not in row.columns]
1084
+ for col in missing:
1085
+ row[col] = np.nan
1086
+ preds = self.model.predict(row[feature_cols])
1087
+ preds = np.clip(preds, 0, 1)
1088
+ sums = preds.sum(axis=1, keepdims=True)
1089
+ sums[sums == 0] = 1
1090
+ preds = preds / sums
1091
+ preds_share = preds.flatten()
1092
+
1093
+ preds_by_cat = {cat: float(preds_share[idx]) for idx, cat in enumerate(CANDIDATE_CATEGORIES)}
1094
+ ordered = ordered_categories()
1095
+ share_vec = np.array([preds_by_cat.get(cat, 0.0) for cat in ordered], dtype=float)
1096
+
1097
+ stats = self.event_stats[self.event_stats["code_bv"] == code_bv].sort_values("date_scrutin")
1098
+ inscrits_used = None
1099
+ if inscrits_override is not None:
1100
+ try:
1101
+ value = float(inscrits_override)
1102
+ if value > 0:
1103
+ inscrits_used = value
1104
+ except (TypeError, ValueError):
1105
+ inscrits_used = None
1106
+ if inscrits_used is None and not stats.empty:
1107
+ serie = pd.to_numeric(stats["inscrits"], errors="coerce").dropna()
1108
+ if not serie.empty:
1109
+ inscrits_used = float(serie.iloc[-1])
1110
+ if inscrits_used is None:
1111
+ return None, "Inscrits indisponibles pour ce bureau.", ""
1112
+
1113
+ def pick_rate(col: str) -> float:
1114
+ default = self.default_rates.get(col, 0.0)
1115
+ default = 0.0 if default is None or np.isnan(default) else float(default)
1116
+ type_default = self.default_rates_by_type.get(target_type, {}).get(col)
1117
+ if type_default is None or np.isnan(type_default):
1118
+ type_default = default
1119
+
1120
+ bureau_scoped = self.event_stats
1121
+ if not bureau_scoped.empty and "round" in bureau_scoped.columns:
1122
+ round1 = bureau_scoped[bureau_scoped["round"] == 1]
1123
+ if not round1.empty:
1124
+ bureau_scoped = round1
1125
+
1126
+ series = None
1127
+ years = None
1128
+ if (
1129
+ not bureau_scoped.empty
1130
+ and col in bureau_scoped.columns
1131
+ and "election_type" in bureau_scoped.columns
1132
+ ):
1133
+ if target_type in bureau_scoped["election_type"].values:
1134
+ mask = bureau_scoped["election_type"] == target_type
1135
+ series = bureau_scoped.loc[mask, col]
1136
+ years = bureau_scoped.loc[mask, "election_year"]
1137
+
1138
+ if series is None and not self.commune_stats.empty and col in self.commune_stats.columns:
1139
+ commune_scoped = self.commune_stats
1140
+ if "round" in commune_scoped.columns:
1141
+ round1 = commune_scoped[commune_scoped["round"] == 1]
1142
+ if not round1.empty:
1143
+ commune_scoped = round1
1144
+ if target_type in commune_scoped["election_type"].values:
1145
+ mask = commune_scoped["election_type"] == target_type
1146
+ series = commune_scoped.loc[mask, col]
1147
+ years = commune_scoped.loc[mask, "election_year"]
1148
+ else:
1149
+ series = commune_scoped[col]
1150
+ years = commune_scoped["election_year"]
1151
+
1152
+ if series is None:
1153
+ if bureau_scoped.empty or col not in bureau_scoped.columns:
1154
+ return type_default
1155
+ series = bureau_scoped[col]
1156
+ years = bureau_scoped["election_year"]
1157
+
1158
+ rate = _project_rate(series, years, target_year)
1159
+ if rate is None or np.isnan(rate):
1160
+ return type_default
1161
+ return float(rate)
1162
+
1163
+ turnout_rate = pick_rate("turnout_pct")
1164
+ blancs_rate = pick_rate("blancs_pct")
1165
+ nuls_rate = pick_rate("nuls_pct")
1166
+ if blancs_rate + nuls_rate > turnout_rate and (blancs_rate + nuls_rate) > 0:
1167
+ scale = turnout_rate / (blancs_rate + nuls_rate)
1168
+ blancs_rate *= scale
1169
+ nuls_rate *= scale
1170
+
1171
+ inscrits_total = int(round(inscrits_used))
1172
+ votants_total = int(round(inscrits_total * turnout_rate))
1173
+ blancs_total = int(round(inscrits_total * blancs_rate))
1174
+ nuls_total = int(round(inscrits_total * nuls_rate))
1175
+ if blancs_total + nuls_total > votants_total and (blancs_total + nuls_total) > 0:
1176
+ scale = votants_total / (blancs_total + nuls_total)
1177
+ blancs_total = int(round(blancs_total * scale))
1178
+ nuls_total = int(round(nuls_total * scale))
1179
+ exprimes_total = max(0, votants_total - blancs_total - nuls_total)
1180
+ abstention_total = max(0, inscrits_total - votants_total)
1181
+
1182
+ bloc_counts = _allocate_counts(share_vec, exprimes_total)
1183
+ counts_by_cat = {cat: int(count) for cat, count in zip(ordered, bloc_counts)}
1184
+ counts_by_cat.update(
1185
+ {
1186
+ "blancs": int(blancs_total),
1187
+ "nuls": int(nuls_total),
1188
+ "abstention": int(abstention_total),
1189
+ }
1190
+ )
1191
+ backend_label = format_backend_label(self.backend)
1192
+ meta = (
1193
+ f"Inscrits utilisés : {inscrits_total} | Votants : {votants_total} | "
1194
+ f"Blancs : {blancs_total} | Nuls : {nuls_total} | Abstentions : {abstention_total}"
1195
+ )
1196
+ details = {
1197
+ "shares_by_cat": preds_by_cat,
1198
+ "share_vec": share_vec,
1199
+ "ordered": ordered,
1200
+ "counts": counts_by_cat,
1201
+ "totals": {
1202
+ "inscrits": inscrits_total,
1203
+ "votants": votants_total,
1204
+ "blancs": blancs_total,
1205
+ "nuls": nuls_total,
1206
+ "abstention": abstention_total,
1207
+ "exprimes": exprimes_total,
1208
+ },
1209
+ }
1210
+ return details, backend_label, meta
1211
+
1212
+ def predict_bureau(
1213
+ self,
1214
+ code_bv: str,
1215
+ target_type: str,
1216
+ target_year: int,
1217
+ inscrits_override: float | None = None,
1218
+ ) -> Tuple[pd.DataFrame, str, str]:
1219
+ details, backend_label, meta = self.predict_bureau_details(
1220
+ code_bv,
1221
+ target_type,
1222
+ target_year,
1223
+ inscrits_override,
1224
+ )
1225
+ if details is None:
1226
+ return pd.DataFrame(), backend_label, ""
1227
+ counts_by_cat = details["counts"]
1228
+ ordered = details["ordered"]
1229
+ rows = []
1230
+ for cat in ordered:
1231
+ rows.append(
1232
+ {
1233
+ "categorie": DISPLAY_CATEGORY_LABELS.get(cat, cat),
1234
+ "nombre": int(counts_by_cat.get(cat, 0)),
1235
+ }
1236
+ )
1237
+ for extra in ["blancs", "nuls", "abstention"]:
1238
+ rows.append(
1239
+ {
1240
+ "categorie": DISPLAY_CATEGORY_LABELS[extra],
1241
+ "nombre": int(counts_by_cat.get(extra, 0)),
1242
+ }
1243
+ )
1244
+ return pd.DataFrame(rows), backend_label, meta
1245
+
1246
+
1247
+ def build_bar_chart(
1248
+ df: pd.DataFrame,
1249
+ value_col: str,
1250
+ *,
1251
+ color: str = "#3b82f6",
1252
+ color_map: Dict[str, str] | None = None,
1253
+ category_col: str = "categorie",
1254
+ ylabel: str = "Score (%)",
1255
+ ):
1256
+ try:
1257
+ import matplotlib.pyplot as plt
1258
+ except Exception:
1259
+ return None
1260
+ if df.empty or value_col not in df.columns:
1261
+ return None
1262
+ plt.figure(figsize=(6, 3))
1263
+ labels = df[category_col].astype(str).tolist() if category_col in df.columns else []
1264
+ if color_map:
1265
+ colors = [color_map.get(label, color) for label in labels]
1266
+ else:
1267
+ colors = color
1268
+ plt.bar(labels, df[value_col], color=colors)
1269
+ plt.xticks(rotation=30, ha="right")
1270
+ plt.ylabel(ylabel)
1271
+ plt.tight_layout()
1272
+ return plt
1273
+
1274
+
1275
+ def create_interface() -> gr.Blocks:
1276
+ backend = PredictorBackend()
1277
+ bureau_choices = build_bureau_choices(backend.history)
1278
+ bureau_labels = [label for label, _ in bureau_choices]
1279
+ bureau_map = {label: value for label, value in bureau_choices}
1280
+ bureau_label_by_code = {value: label for label, value in bureau_choices}
1281
+ targets = backend.available_targets()
1282
+ target_labels = [f"{t} {y}" for t, y in targets]
1283
+ history_choices = build_history_choices(backend.history)
1284
+ history_labels = [label for label, _ in history_choices]
1285
+ history_map = {label: value for label, value in history_choices}
1286
+ if ("municipales", 2026) in targets:
1287
+ default_target = "municipales 2026"
1288
+ elif targets:
1289
+ default_target = f"{targets[-1][0]} {targets[-1][1]}"
1290
+ else:
1291
+ default_target = "municipales 2026"
1292
+ default_bv = bureau_labels[0] if bureau_labels else None
1293
+ default_history = history_labels[-1] if history_labels else None
1294
+ backend_label = format_backend_label(backend.backend)
1295
+ residual_payload = load_residual_intervals()
1296
+ residuals = residual_payload.get("residuals", {}) if isinstance(residual_payload, dict) else {}
1297
+ residual_model = residual_payload.get("model", "inconnu") if isinstance(residual_payload, dict) else "inconnu"
1298
+ interval_choices = list(INTERVAL_BANDS.keys()) or ["80% (p10-p90)"]
1299
+ interval_default = interval_choices[0]
1300
+ bloc_labels = [DISPLAY_CATEGORY_LABELS.get(cat, cat) for cat in ordered_categories()]
1301
+
1302
+ with gr.Blocks(title="Prévision Municipales — Ville de Sète") as demo:
1303
+ gr.Markdown(
1304
+ """
1305
+ # Prévision Municipales — Ville de Sète
1306
+ Choisissez un bureau de vote et une élection cible.
1307
+ Le modèle estime un volume par catégorie politique, ainsi que les abstentions, blancs et nuls.
1308
+ """
1309
+ )
1310
+ with gr.Tabs():
1311
+ with gr.Tab("Prévisions"):
1312
+ with gr.Row():
1313
+ bureau_dd = gr.Dropdown(choices=bureau_labels, value=default_bv, label="Bureau de vote")
1314
+ target_dd = gr.Dropdown(choices=target_labels, value=default_target, label="Élection cible (type année)")
1315
+ inscrits_in = gr.Number(value=None, label="Inscrits (optionnel)", precision=0)
1316
+ predict_btn = gr.Button("Prédire")
1317
+ source_box = gr.Markdown(value=f"Source des données : {backend_label}")
1318
+ output_df = gr.Dataframe(
1319
+ headers=PREDICTION_OUTPUT_COLUMNS,
1320
+ label="Prédictions (nombres)",
1321
+ )
1322
+ chart = gr.Plot()
1323
+
1324
+ with gr.Tab("Historique"):
1325
+ gr.Markdown(
1326
+ """
1327
+ Consultation des résultats passés (sans machine learning).
1328
+ Sélectionnez un bureau et une élection pour afficher l'histogramme des parts par tendance politique.
1329
+ """
1330
+ )
1331
+ with gr.Row():
1332
+ history_bureau_dd = gr.Dropdown(choices=bureau_labels, value=default_bv, label="Bureau de vote")
1333
+ history_election_dd = gr.Dropdown(
1334
+ choices=history_labels,
1335
+ value=default_history,
1336
+ label="Élection (type année tour)",
1337
+ )
1338
+ history_btn = gr.Button("Afficher l'historique")
1339
+ history_source = gr.Markdown(value=f"Source des données : {backend_label}")
1340
+ history_df = gr.Dataframe(headers=HISTORY_OUTPUT_COLUMNS, label="Résultats historiques")
1341
+ history_chart = gr.Plot()
1342
+ history_meta = gr.Markdown()
1343
+
1344
+ with gr.Tab("Carte"):
1345
+ gr.Markdown(
1346
+ """
1347
+ Carte des bureaux de vote de Sète.
1348
+ Cliquez sur un polygone pour afficher la prédiction (table + graphique).
1349
+ """
1350
+ )
1351
+ map_legend = gr.HTML(value=build_map_legend_html())
1352
+ with gr.Row():
1353
+ map_target_dd = gr.Dropdown(
1354
+ choices=target_labels,
1355
+ value=default_target,
1356
+ label="Élection cible (type année)",
1357
+ )
1358
+ map_btn = gr.Button("Afficher la carte")
1359
+ map_html = gr.HTML(value="<p>Cliquez sur 'Afficher la carte' pour charger la carte.</p>")
1360
+
1361
+ with gr.Tab("Stratégie"):
1362
+ gr.Markdown(
1363
+ """
1364
+ Analyse stratégique par bureau : intervalles d'incertitude issus des résidus CV,
1365
+ puis simulateur de transferts pour estimer des bascules potentielles.
1366
+ """
1367
+ )
1368
+ with gr.Row():
1369
+ strategy_bureau_dd = gr.Dropdown(choices=bureau_labels, value=default_bv, label="Bureau de vote")
1370
+ strategy_target_dd = gr.Dropdown(
1371
+ choices=target_labels,
1372
+ value=default_target,
1373
+ label="Élection cible (type année)",
1374
+ )
1375
+ strategy_inscrits_in = gr.Number(value=None, label="Inscrits (optionnel)", precision=0)
1376
+ interval_dd = gr.Dropdown(
1377
+ choices=interval_choices,
1378
+ value=interval_default,
1379
+ label="Intervalle CV",
1380
+ )
1381
+ strategy_btn = gr.Button("Analyser l'incertitude")
1382
+ interval_source = gr.Markdown(
1383
+ value=(
1384
+ f"Intervalle CV basé sur le modèle : {residual_model}"
1385
+ if residuals
1386
+ else "Intervalle CV indisponible (fallback ±3%)."
1387
+ )
1388
+ )
1389
+ interval_df = gr.Dataframe(
1390
+ headers=INTERVAL_OUTPUT_COLUMNS,
1391
+ label="Plage empirique par bloc",
1392
+ )
1393
+ interval_chart = gr.Plot()
1394
+
1395
+ gr.Markdown("### Simulateur de transferts (points d'inscrits)")
1396
+ with gr.Row():
1397
+ target_bloc_dd = gr.Dropdown(choices=bloc_labels, value=bloc_labels[0] if bloc_labels else None, label="Bloc cible")
1398
+ with gr.Row():
1399
+ source_1_dd = gr.Dropdown(choices=TRANSFER_CATEGORY_LABELS, value=DISPLAY_CATEGORY_LABELS["abstention"], label="Source 1")
1400
+ target_1_dd = gr.Dropdown(choices=TRANSFER_CATEGORY_LABELS, value=DISPLAY_CATEGORY_LABELS["droite_dure"], label="Cible 1")
1401
+ delta_1 = gr.Slider(minimum=0, maximum=10, value=3, step=0.1, label="Delta 1 (points %)")
1402
+ with gr.Row():
1403
+ source_2_dd = gr.Dropdown(choices=TRANSFER_CATEGORY_LABELS, value=DISPLAY_CATEGORY_LABELS["droite_modere"], label="Source 2")
1404
+ target_2_dd = gr.Dropdown(choices=TRANSFER_CATEGORY_LABELS, value=DISPLAY_CATEGORY_LABELS["gauche_modere"], label="Cible 2")
1405
+ delta_2 = gr.Slider(minimum=0, maximum=10, value=3, step=0.1, label="Delta 2 (points %)")
1406
+ simulate_btn = gr.Button("Simuler les transferts")
1407
+ sim_df = gr.Dataframe(headers=SIM_OUTPUT_COLUMNS, label="Simulation par catégorie")
1408
+ sim_chart = gr.Plot()
1409
+ opportunity_df = gr.Dataframe(headers=OPPORTUNITY_OUTPUT_COLUMNS, label="Bureaux à potentiel (trié)")
1410
+
1411
+ def _predict(bv_label: str, target_label: str, inscrits_override: float | None):
1412
+ if not bv_label or not target_label:
1413
+ return pd.DataFrame(), "Entrée invalide", None
1414
+ code_bv = bureau_map.get(bv_label)
1415
+ if not code_bv:
1416
+ return pd.DataFrame(), "Bureau invalide", None
1417
+ try:
1418
+ parts = target_label.split()
1419
+ target_type, target_year = parts[0].lower(), int(parts[1])
1420
+ except Exception:
1421
+ target_type, target_year = "municipales", 2026
1422
+ df, backend_label, meta = backend.predict_bureau(code_bv, target_type, target_year, inscrits_override)
1423
+ plot = build_bar_chart(
1424
+ df,
1425
+ value_col="nombre",
1426
+ ylabel="Nombre d'électeurs",
1427
+ color_map=DISPLAY_LABEL_COLORS,
1428
+ )
1429
+ meta_label = f" | {meta}" if meta else ""
1430
+ return df, f"Source des données : {backend_label}{meta_label}", plot
1431
+
1432
+ def _parse_target_label(target_label: str) -> Tuple[str, int]:
1433
+ try:
1434
+ parts = target_label.split()
1435
+ return parts[0].lower(), int(parts[1])
1436
+ except Exception:
1437
+ return "municipales", 2026
1438
+
1439
+ def _map(target_label: str):
1440
+ if not target_label:
1441
+ return "<p>Élection invalide.</p>"
1442
+ target_type, target_year = _parse_target_label(target_label)
1443
+ return build_bureau_map_html(backend, target_type, target_year)
1444
+
1445
+ def _history(bv_label: str, election_label: str):
1446
+ if not bv_label or not election_label:
1447
+ empty = pd.DataFrame(columns=HISTORY_OUTPUT_COLUMNS)
1448
+ return empty, "Entrée invalide", None, ""
1449
+ code_bv = bureau_map.get(bv_label)
1450
+ if not code_bv:
1451
+ empty = pd.DataFrame(columns=HISTORY_OUTPUT_COLUMNS)
1452
+ return empty, "Bureau invalide", None, ""
1453
+ election_key = history_map.get(election_label)
1454
+ if not election_key:
1455
+ empty = pd.DataFrame(columns=HISTORY_OUTPUT_COLUMNS)
1456
+ return empty, "Élection invalide", None, ""
1457
+ try:
1458
+ election_type, election_year, round_num = parse_election_key(election_key)
1459
+ except Exception:
1460
+ empty = pd.DataFrame(columns=HISTORY_OUTPUT_COLUMNS)
1461
+ return empty, "Élection invalide", None, ""
1462
+ history_slice = backend.history[
1463
+ (backend.history["code_bv"] == code_bv)
1464
+ & (backend.history["election_type"] == election_type)
1465
+ & (backend.history["election_year"] == election_year)
1466
+ & (backend.history["round"] == round_num)
1467
+ ].copy()
1468
+ if history_slice.empty:
1469
+ empty = pd.DataFrame(columns=HISTORY_OUTPUT_COLUMNS)
1470
+ return empty, f"Source des données : {backend_label}", None, "Aucun résultat pour ce bureau."
1471
+ table = prepare_history_table(history_slice)
1472
+ plot = build_bar_chart(
1473
+ table,
1474
+ value_col="score_%",
1475
+ ylabel="Score (%)",
1476
+ color_map=DISPLAY_LABEL_COLORS,
1477
+ )
1478
+ meta = format_history_meta(history_slice)
1479
+ return table, f"Source des données : {backend_label}", plot, meta
1480
+
1481
+ def _strategy_interval(
1482
+ bv_label: str,
1483
+ target_label: str,
1484
+ inscrits_override: float | None,
1485
+ band_label: str,
1486
+ ):
1487
+ empty = pd.DataFrame(columns=INTERVAL_OUTPUT_COLUMNS)
1488
+ if not bv_label or not target_label:
1489
+ return empty, "Entrée invalide", None
1490
+ code_bv = bureau_map.get(bv_label)
1491
+ if not code_bv:
1492
+ return empty, "Bureau invalide", None
1493
+ target_type, target_year = _parse_target_label(target_label)
1494
+ details, backend_label_local, _ = backend.predict_bureau_details(
1495
+ code_bv,
1496
+ target_type,
1497
+ target_year,
1498
+ inscrits_override,
1499
+ )
1500
+ if details is None:
1501
+ return empty, backend_label_local, None
1502
+ totals = details["totals"]
1503
+ exprimes_total = int(totals.get("exprimes", 0))
1504
+ table = build_interval_table(
1505
+ details["shares_by_cat"],
1506
+ exprimes_total,
1507
+ residuals,
1508
+ band_label,
1509
+ )
1510
+ plot = build_interval_chart(table, color_map=DISPLAY_LABEL_COLORS)
1511
+ source = (
1512
+ f"Intervalle CV ({band_label}) basé sur le modèle : {residual_model}"
1513
+ if residuals
1514
+ else "Intervalle CV indisponible (fallback ±3%)."
1515
+ )
1516
+ return table, source, plot
1517
+
1518
+ def _strategy_simulate(
1519
+ bv_label: str,
1520
+ target_label: str,
1521
+ inscrits_override: float | None,
1522
+ bloc_cible_label: str,
1523
+ source_1: str,
1524
+ target_1: str,
1525
+ delta_1_val: float,
1526
+ source_2: str,
1527
+ target_2: str,
1528
+ delta_2_val: float,
1529
+ ):
1530
+ empty_sim = pd.DataFrame(columns=SIM_OUTPUT_COLUMNS)
1531
+ empty_oppo = pd.DataFrame(columns=OPPORTUNITY_OUTPUT_COLUMNS)
1532
+ if not bv_label or not target_label:
1533
+ return empty_sim, None, empty_oppo
1534
+ code_bv = bureau_map.get(bv_label)
1535
+ if not code_bv:
1536
+ return empty_sim, None, empty_oppo
1537
+ target_type, target_year = _parse_target_label(target_label)
1538
+ details, _, _ = backend.predict_bureau_details(
1539
+ code_bv,
1540
+ target_type,
1541
+ target_year,
1542
+ inscrits_override,
1543
+ )
1544
+ if details is None:
1545
+ return empty_sim, None, empty_oppo
1546
+
1547
+ transfers = []
1548
+ for src_label, dst_label, delta in [
1549
+ (source_1, target_1, delta_1_val),
1550
+ (source_2, target_2, delta_2_val),
1551
+ ]:
1552
+ src_key = CATEGORY_LABEL_TO_KEY.get(src_label)
1553
+ dst_key = CATEGORY_LABEL_TO_KEY.get(dst_label)
1554
+ if src_key and dst_key and delta and delta > 0:
1555
+ transfers.append((src_key, dst_key, float(delta)))
1556
+
1557
+ counts = details["counts"]
1558
+ totals = details["totals"]
1559
+ inscrits_total = int(totals.get("inscrits", 0))
1560
+ updated = apply_transfers(counts, inscrits_total, transfers)
1561
+ sim_table = build_simulation_table(counts, updated)
1562
+ sim_plot = build_bar_chart(
1563
+ sim_table,
1564
+ value_col="apres_transfert",
1565
+ ylabel="Nombre d'électeurs",
1566
+ color_map=DISPLAY_LABEL_COLORS,
1567
+ )
1568
+
1569
+ target_bloc = CATEGORY_LABEL_TO_KEY.get(bloc_cible_label, bloc_cible_label)
1570
+ opp_rows = []
1571
+ if target_bloc in ordered_categories():
1572
+ for bv_code in backend.available_bureaux():
1573
+ override = inscrits_override if bv_code == code_bv else None
1574
+ bv_details, _, _ = backend.predict_bureau_details(
1575
+ bv_code,
1576
+ target_type,
1577
+ target_year,
1578
+ override,
1579
+ )
1580
+ if bv_details is None:
1581
+ continue
1582
+ base_counts = bv_details["counts"]
1583
+ bv_totals = bv_details["totals"]
1584
+ bv_inscrits = int(bv_totals.get("inscrits", 0))
1585
+ updated_counts = apply_transfers(base_counts, bv_inscrits, transfers)
1586
+ bloc_counts = {cat: int(base_counts.get(cat, 0)) for cat in ordered_categories()}
1587
+ updated_blocs = {cat: int(updated_counts.get(cat, 0)) for cat in ordered_categories()}
1588
+ top_base = max(bloc_counts, key=bloc_counts.get) if bloc_counts else None
1589
+ top_after = max(updated_blocs, key=updated_blocs.get) if updated_blocs else None
1590
+ gain = int(updated_counts.get(target_bloc, 0) - base_counts.get(target_bloc, 0))
1591
+ opp_rows.append(
1592
+ {
1593
+ "bureau": bureau_label_by_code.get(bv_code, bv_code),
1594
+ "gain_cible": gain,
1595
+ "score_base": int(base_counts.get(target_bloc, 0)),
1596
+ "score_apres": int(updated_counts.get(target_bloc, 0)),
1597
+ "top_base": DISPLAY_CATEGORY_LABELS.get(top_base, top_base),
1598
+ "top_apres": DISPLAY_CATEGORY_LABELS.get(top_after, top_after),
1599
+ "bascule": "oui" if top_base != target_bloc and top_after == target_bloc else "non",
1600
+ }
1601
+ )
1602
+ opp_df = pd.DataFrame(opp_rows, columns=OPPORTUNITY_OUTPUT_COLUMNS)
1603
+ if not opp_df.empty:
1604
+ opp_df = opp_df.sort_values(["bascule", "gain_cible"], ascending=[False, False])
1605
+ return sim_table, sim_plot, opp_df
1606
+
1607
+ predict_btn.click(_predict, inputs=[bureau_dd, target_dd, inscrits_in], outputs=[output_df, source_box, chart])
1608
+ history_btn.click(
1609
+ _history,
1610
+ inputs=[history_bureau_dd, history_election_dd],
1611
+ outputs=[history_df, history_source, history_chart, history_meta],
1612
+ )
1613
+ map_btn.click(
1614
+ _map,
1615
+ inputs=[map_target_dd],
1616
+ outputs=[map_html],
1617
+ )
1618
+ strategy_btn.click(
1619
+ _strategy_interval,
1620
+ inputs=[strategy_bureau_dd, strategy_target_dd, strategy_inscrits_in, interval_dd],
1621
+ outputs=[interval_df, interval_source, interval_chart],
1622
+ )
1623
+ simulate_btn.click(
1624
+ _strategy_simulate,
1625
+ inputs=[
1626
+ strategy_bureau_dd,
1627
+ strategy_target_dd,
1628
+ strategy_inscrits_in,
1629
+ target_bloc_dd,
1630
+ source_1_dd,
1631
+ target_1_dd,
1632
+ delta_1,
1633
+ source_2_dd,
1634
+ target_2_dd,
1635
+ delta_2,
1636
+ ],
1637
+ outputs=[sim_df, sim_chart, opportunity_df],
1638
+ )
1639
+ return demo
1640
+
1641
+
1642
+ if __name__ == "__main__":
1643
+ logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s")
1644
+ demo = create_interface()
1645
+ demo.launch(server_name="0.0.0.0", server_port=7860)
config/communes.yaml ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ communes:
2
+ # - code_insee: "34003"
3
+ # nom: "Agde"
4
+ # - code_insee: "34101"
5
+ # nom: "Florensac"
6
+ # - code_insee: "34199"
7
+ # nom: "Pezenas"
8
+ # - code_insee: "34300"
9
+ # nom: "Servian"
10
+ - code_insee: "34301"
11
+ nom: "Sete"
config/nuances.yaml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Mapping des nuances politiques vers les blocs.
2
+ #
3
+ # - base_mapping: chemin vers le CSV historique (optionnel).
4
+ # - overrides: liste d'ajouts/surcharges pour des nuances absentes ou nouvelles.
5
+ # - mapping: mapping complet si vous ne voulez pas utiliser base_mapping.
6
+
7
+ base_mapping: data/mapping_candidats_blocs.csv
8
+
9
+ # Exemple d'ajout/surcharge :
10
+ # overrides:
11
+ # - code_candidature: "XYZ"
12
+ # nom_candidature: "Exemple de nuance"
13
+ # blocs: [gauche_modere, centre]
14
+ overrides: []
config/raw_sources.yaml ADDED
@@ -0,0 +1,344 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 14_EU.csv:
2
+ code_bv_cols:
3
+ - Code de la commune
4
+ - N° de bureau de vote
5
+ date_scrutin: '2014-05-25'
6
+ rename_map:
7
+ Code nuance du candidat: code_candidature
8
+ Exprimés: exprimes
9
+ Exprimés: exprimes
10
+ Inscrits: inscrits
11
+ Nom du candidat: nom_candidature
12
+ Nombre de voix du candidat: voix
13
+ Prénom du candidat: nom_candidature
14
+ Voix: voix
15
+ Votants: votants
16
+ tour_column: N° tour
17
+ type_scrutin: europeennes
18
+ 14_MN14_T1T2.csv:
19
+ code_bv_cols:
20
+ - Code commune
21
+ - N° de bureau de vote
22
+ date_scrutin: '2014-03-23'
23
+ rename_map:
24
+ Code nuance de la liste: code_candidature
25
+ Exprimés: exprimes
26
+ Inscrits: inscrits
27
+ Nom du candidat tête de liste: nom_candidature
28
+ Nombre de voix: voix
29
+ Prénom du candidat tête de liste: nom_candidature
30
+ Votants: votants
31
+ tour_column: N° tour
32
+ type_scrutin: municipales
33
+ 17_L_T1.csv:
34
+ code_bv_cols:
35
+ - Code de la commune
36
+ - Code du b.vote
37
+ date_scrutin: '2017-06-11'
38
+ rename_map:
39
+ Abstentions: abstentions
40
+ Blancs: blancs
41
+ Exprimés: exprimes
42
+ Inscrits: inscrits
43
+ Nom: nom_candidature
44
+ Nuance: code_candidature
45
+ Nuls: nuls
46
+ Voix: voix
47
+ Votants: votants
48
+ tour: 1
49
+ type_scrutin: legislatives
50
+ 17_L_T2.csv:
51
+ code_bv_cols:
52
+ - Code de la commune
53
+ - Code du b.vote
54
+ date_scrutin: '2017-06-18'
55
+ rename_map:
56
+ Abstentions: abstentions
57
+ Blancs: blancs
58
+ Exprimés: exprimes
59
+ Inscrits: inscrits
60
+ Nom: nom_candidature
61
+ Nuance: code_candidature
62
+ Nuls: nuls
63
+ Voix: voix
64
+ Votants: votants
65
+ tour: 2
66
+ type_scrutin: legislatives
67
+ 17_PR_T1.csv:
68
+ code_bv_cols:
69
+ - Code de la commune
70
+ - Code du b.vote
71
+ date_scrutin: '2017-04-23'
72
+ rename_map:
73
+ Abstentions: abstentions
74
+ Blancs: blancs
75
+ Code nuance du candidat: code_candidature
76
+ Exprimés: exprimes
77
+ Inscrits: inscrits
78
+ Nom: nom_candidature
79
+ Nuls: nuls
80
+ Voix: voix
81
+ Votants: votants
82
+ tour: 1
83
+ type_scrutin: presidentielles
84
+ 17_PR_T2.csv:
85
+ code_bv_cols:
86
+ - Code de la commune
87
+ - Code du b.vote
88
+ date_scrutin: '2017-05-07'
89
+ rename_map:
90
+ Abstentions: abstentions
91
+ Blancs: blancs
92
+ Code nuance du candidat: code_candidature
93
+ Exprimés: exprimes
94
+ Inscrits: inscrits
95
+ Nom: nom_candidature
96
+ Nuls: nuls
97
+ Voix: voix
98
+ Votants: votants
99
+ tour: 2
100
+ type_scrutin: presidentielles
101
+ 19_EU.csv:
102
+ code_bv_cols:
103
+ - Code de la commune
104
+ - Code du b.vote
105
+ date_scrutin: '2019-05-26'
106
+ rename_map:
107
+ Abstentions: abstentions
108
+ Blancs: blancs
109
+ Exprimés: exprimes
110
+ Inscrits: inscrits
111
+ Nom Tête de Liste: nom_candidature
112
+ Nuance Liste: code_candidature
113
+ Nuls: nuls
114
+ Voix: voix
115
+ Votants: votants
116
+ tour: 1
117
+ type_scrutin: europeennes
118
+ 20_MN_T1.csv:
119
+ code_bv_cols:
120
+ - Code de la commune
121
+ - Code B.Vote
122
+ date_scrutin: '2020-03-15'
123
+ rename_map:
124
+ Abstentions: abstentions
125
+ Blancs: blancs
126
+ Code Nuance: code_candidature
127
+ Exprimés: exprimes
128
+ Inscrits: inscrits
129
+ Liste: nom_candidature
130
+ Nom: nom_candidature
131
+ Nuls: nuls
132
+ Voix: voix
133
+ Votants: votants
134
+ sep: ;
135
+ tour: 1
136
+ type_scrutin: municipales
137
+ 20_MN_T2.csv:
138
+ code_bv_cols:
139
+ - Code de la commune
140
+ - Code B.Vote
141
+ date_scrutin: '2020-06-28'
142
+ rename_map:
143
+ Abstentions: abstentions
144
+ Blancs: blancs
145
+ Code Nuance: code_candidature
146
+ Exprimés: exprimes
147
+ Inscrits: inscrits
148
+ Liste: nom_candidature
149
+ Nom: nom_candidature
150
+ Nuls: nuls
151
+ Voix: voix
152
+ Votants: votants
153
+ tour: 2
154
+ type_scrutin: municipales
155
+ 21_DEP_T1.csv:
156
+ code_bv_cols:
157
+ - Code de la commune
158
+ - Code du b.vote
159
+ date_scrutin: '2021-06-20'
160
+ rename_map:
161
+ Abstentions: abstentions
162
+ Binôme: nom_candidature
163
+ Blancs: blancs
164
+ Exprimés: exprimes
165
+ Inscrits: inscrits
166
+ Nuance: code_candidature
167
+ Nuls: nuls
168
+ Voix: voix
169
+ Votants: votants
170
+ tour: 1
171
+ type_scrutin: departementales
172
+ 21_DEP_T2.csv:
173
+ code_bv_cols:
174
+ - Code de la commune
175
+ - Code du b.vote
176
+ date_scrutin: '2021-06-27'
177
+ rename_map:
178
+ Abstentions: abstentions
179
+ Binôme: nom_candidature
180
+ Blancs: blancs
181
+ Exprimés: exprimes
182
+ Inscrits: inscrits
183
+ Nuance: code_candidature
184
+ Nuls: nuls
185
+ Voix: voix
186
+ Votants: votants
187
+ tour: 2
188
+ type_scrutin: departementales
189
+ 21_REG_T1.csv:
190
+ code_bv_cols:
191
+ - Code de la commune
192
+ - Code du b.vote
193
+ date_scrutin: '2021-06-20'
194
+ rename_map:
195
+ Abstentions: abstentions
196
+ Blancs: blancs
197
+ Exprimés: exprimes
198
+ Inscrits: inscrits
199
+ Libellé Abrégé Liste: nom_candidature
200
+ Nuance Liste: code_candidature
201
+ Nuls: nuls
202
+ Voix: voix
203
+ Votants: votants
204
+ tour: 1
205
+ type_scrutin: regionales
206
+ 21_REG_T2.csv:
207
+ code_bv_cols:
208
+ - Code de la commune
209
+ - Code du b.vote
210
+ date_scrutin: '2021-06-27'
211
+ rename_map:
212
+ Abstentions: abstentions
213
+ Blancs: blancs
214
+ Exprimés: exprimes
215
+ Inscrits: inscrits
216
+ Libellé Abrégé Liste: nom_candidature
217
+ Nuance Liste: code_candidature
218
+ Nuls: nuls
219
+ Voix: voix
220
+ Votants: votants
221
+ tour: 2
222
+ type_scrutin: regionales
223
+ 22_L_T1.csv:
224
+ code_bv_cols:
225
+ - Code de la commune
226
+ - Code du b.vote
227
+ date_scrutin: '2022-06-12'
228
+ rename_map:
229
+ Abstentions: abstentions
230
+ Blancs: blancs
231
+ Exprimés: exprimes
232
+ Inscrits: inscrits
233
+ Nom: nom_candidature
234
+ Nuance: code_candidature
235
+ Nuls: nuls
236
+ Voix: voix
237
+ Votants: votants
238
+ tour: 1
239
+ type_scrutin: legislatives
240
+ 22_L_T2.csv:
241
+ code_bv_cols:
242
+ - Code de la commune
243
+ - Code du b.vote
244
+ date_scrutin: '2022-06-19'
245
+ rename_map:
246
+ Abstentions: abstentions
247
+ Blancs: blancs
248
+ Exprimés: exprimes
249
+ Inscrits: inscrits
250
+ Nom: nom_candidature
251
+ Nuance: code_candidature
252
+ Nuls: nuls
253
+ Voix: voix
254
+ Votants: votants
255
+ tour: 2
256
+ type_scrutin: legislatives
257
+ 22_PR_T1.csv:
258
+ code_bv_cols:
259
+ - Code de la commune
260
+ - Code du b.vote
261
+ date_scrutin: '2022-04-10'
262
+ rename_map:
263
+ Abstentions: abstentions
264
+ Blancs: blancs
265
+ Code nuance du candidat: code_candidature
266
+ Exprimés: exprimes
267
+ Inscrits: inscrits
268
+ Nom: nom_candidature
269
+ Nuls: nuls
270
+ Voix: voix
271
+ Votants: votants
272
+ tour: 1
273
+ type_scrutin: presidentielles
274
+ 22_PR_T2.csv:
275
+ code_bv_cols:
276
+ - Code de la commune
277
+ - Code du b.vote
278
+ date_scrutin: '2022-04-24'
279
+ rename_map:
280
+ Abstentions: abstentions
281
+ Blancs: blancs
282
+ Code nuance du candidat: code_candidature
283
+ Exprimés: exprimes
284
+ Inscrits: inscrits
285
+ Nom: nom_candidature
286
+ Nuls: nuls
287
+ Voix: voix
288
+ Votants: votants
289
+ tour: 2
290
+ type_scrutin: presidentielles
291
+ 24_EU.csv:
292
+ code_bv_cols:
293
+ - Code commune
294
+ - Code BV
295
+ date_scrutin: '2024-06-09'
296
+ rename_map:
297
+ Abstentions: abstentions
298
+ Blancs: blancs
299
+ Exprimés: exprimes
300
+ Inscrits: inscrits
301
+ Libellé abrégé de liste 1: nom_candidature
302
+ Nuance liste 1: code_candidature
303
+ Nuls: nuls
304
+ Voix: voix
305
+ Voix 1: voix
306
+ Votants: votants
307
+ tour: 1
308
+ type_scrutin: europeennes
309
+ 24_L_T1.csv:
310
+ code_bv_cols:
311
+ - Code commune
312
+ - Code BV
313
+ date_scrutin: '2024-06-30'
314
+ rename_map:
315
+ Abstentions: abstentions
316
+ Binôme: nom_candidature
317
+ Blancs: blancs
318
+ Exprimés: exprimes
319
+ Inscrits: inscrits
320
+ Libellé Abrégé Liste: nom_candidature
321
+ Nuance Liste: code_candidature
322
+ Nuls: nuls
323
+ Voix: voix
324
+ Votants: votants
325
+ tour: 1
326
+ type_scrutin: legislatives
327
+ 24_L_T2.csv:
328
+ code_bv_cols:
329
+ - Code commune
330
+ - Code BV
331
+ date_scrutin: '2024-07-07'
332
+ rename_map:
333
+ Abstentions: abstentions
334
+ Binôme: nom_candidature
335
+ Blancs: blancs
336
+ Exprimés: exprimes
337
+ Inscrits: inscrits
338
+ Libellé Abrégé Liste: nom_candidature
339
+ Nuance Liste: code_candidature
340
+ Nuls: nuls
341
+ Voix: voix
342
+ Votants: votants
343
+ tour: 2
344
+ type_scrutin: legislatives
data/geo/bdv_s_te.geojson ADDED
The diff for this file is too large to render. See raw diff
 
data/geo/bdv_s_te.kml ADDED
@@ -0,0 +1,1762 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <kml xmlns="http://www.opengis.net/kml/2.2"><Document>
2
+ <Placemark id="wC4Df">
3
+ <name>Bureau 01</name><ExtendedData></ExtendedData>
4
+ <Polygon>
5
+ <outerBoundaryIs>
6
+ <LinearRing><coordinates>3.701684,43.397393
7
+ 3.697436,43.396753
8
+ 3.697017,43.395943
9
+ 3.697007,43.395428
10
+ 3.695612,43.395241
11
+ 3.695934,43.396504
12
+ 3.696427,43.396473
13
+ 3.696578,43.396831
14
+ 3.696213,43.397159
15
+ 3.696964,43.397128
16
+ 3.697157,43.39751
17
+ 3.697157,43.398079
18
+ 3.696975,43.398211
19
+ 3.696985,43.398858
20
+ 3.696771,43.399809
21
+ 3.697093,43.401649
22
+ 3.695955,43.40182
23
+ 3.694453,43.401555
24
+ 3.694775,43.401384
25
+ 3.694743,43.401189
26
+ 3.695129,43.401212
27
+ 3.695118,43.401064
28
+ 3.695505,43.40108
29
+ 3.69573,43.400955
30
+ 3.695419,43.400745
31
+ 3.69543,43.400441
32
+ 3.695033,43.400277
33
+ 3.69514,43.400129
34
+ 3.695312,43.39995
35
+ 3.695129,43.399833
36
+ 3.695397,43.399669
37
+ 3.69514,43.399279
38
+ 3.694839,43.399295
39
+ 3.695022,43.398437
40
+ 3.695033,43.397611
41
+ 3.69529,43.397564
42
+ 3.695322,43.397736
43
+ 3.695805,43.397704
44
+ 3.695065,43.396091
45
+ 3.69411,43.396247
46
+ 3.69293,43.394867
47
+ 3.695312,43.394914
48
+ 3.699818,43.395522
49
+ 3.700311,43.395319
50
+ 3.700504,43.394929
51
+ 3.701942,43.394134
52
+ 3.702006,43.394399
53
+ 3.700676,43.395132
54
+ 3.702328,43.395257
55
+ 3.702822,43.39574
56
+ 3.702672,43.397112
57
+ 3.701684,43.397393</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
58
+ <Placemark id="S9B2Q">
59
+ <name>Bureau 2</name><ExtendedData>
60
+ <Data name="_umap_options"><value>{"color":"MediumOrchid"}</value></Data></ExtendedData>
61
+ <Polygon>
62
+ <outerBoundaryIs>
63
+ <LinearRing><coordinates>3.694561,43.402506
64
+ 3.694389,43.402514
65
+ 3.694282,43.402569
66
+ 3.693911,43.402456
67
+ 3.694072,43.401622
68
+ 3.692425,43.401626
69
+ 3.691535,43.402896
70
+ 3.691181,43.40281
71
+ 3.691063,43.402974
72
+ 3.690355,43.402849
73
+ 3.690001,43.403325
74
+ 3.689657,43.403301
75
+ 3.689352,43.403804
76
+ 3.688483,43.403644
77
+ 3.688343,43.403906
78
+ 3.689228,43.404089
79
+ 3.689132,43.40426
80
+ 3.690258,43.404455
81
+ 3.690054,43.404615
82
+ 3.689856,43.404747
83
+ 3.690081,43.404779
84
+ 3.690382,43.40451
85
+ 3.69072,43.404701
86
+ 3.692297,43.404829
87
+ 3.692393,43.404915
88
+ 3.692688,43.404541
89
+ 3.693901,43.405055
90
+ 3.694416,43.405149
91
+ 3.694142,43.405036
92
+ 3.694287,43.403995
93
+ 3.694561,43.402506</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
94
+ <Placemark id="oC7F2">
95
+ <name>Bureau 3</name><ExtendedData>
96
+ <Data name="_umap_options"><value>{"color":"Chartreuse"}</value></Data></ExtendedData>
97
+ <Polygon>
98
+ <outerBoundaryIs>
99
+ <LinearRing><coordinates>3.69765,43.401778
100
+ 3.698037,43.401785
101
+ 3.697999,43.401937
102
+ 3.698251,43.401941
103
+ 3.698257,43.401817
104
+ 3.698745,43.401844
105
+ 3.698879,43.401922
106
+ 3.699678,43.401945
107
+ 3.699753,43.401867
108
+ 3.700097,43.401859
109
+ 3.700118,43.400347
110
+ 3.701695,43.400238
111
+ 3.701996,43.399685
112
+ 3.701878,43.399575
113
+ 3.701695,43.39963
114
+ 3.699024,43.397494
115
+ 3.697704,43.397619
116
+ 3.69765,43.401778</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
117
+ <Placemark id="iUben">
118
+ <name>Bureau 5</name><ExtendedData>
119
+ <Data name="_umap_options"><value>{"color":"LightSkyBlue"}</value></Data></ExtendedData>
120
+ <Polygon>
121
+ <outerBoundaryIs>
122
+ <LinearRing><coordinates>3.696084,43.406825
123
+ 3.69338,43.40656
124
+ 3.69352,43.405079
125
+ 3.694432,43.405149
126
+ 3.694142,43.40504
127
+ 3.694282,43.404073
128
+ 3.694571,43.402522
129
+ 3.694421,43.402514
130
+ 3.694271,43.402577
131
+ 3.693917,43.40246
132
+ 3.694067,43.401637
133
+ 3.694464,43.401583
134
+ 3.694689,43.40168
135
+ 3.695419,43.401727
136
+ 3.695698,43.401668
137
+ 3.695891,43.40182
138
+ 3.696631,43.401828
139
+ 3.696685,43.401672
140
+ 3.697082,43.40168
141
+ 3.697168,43.402179
142
+ 3.696524,43.405313
143
+ 3.696084,43.406825</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
144
+ <Placemark id="rI3Wb">
145
+ <name>Bureau 6</name><ExtendedData></ExtendedData>
146
+ <Polygon>
147
+ <outerBoundaryIs>
148
+ <LinearRing><coordinates>3.690253,43.404459
149
+ 3.689126,43.40426
150
+ 3.689228,43.404081
151
+ 3.688332,43.403909
152
+ 3.687989,43.404354
153
+ 3.689298,43.405059
154
+ 3.689309,43.405164
155
+ 3.689239,43.405671
156
+ 3.68866,43.405605
157
+ 3.688102,43.406045
158
+ 3.693359,43.406552
159
+ 3.693531,43.405063
160
+ 3.69441,43.405157
161
+ 3.693895,43.405055
162
+ 3.692704,43.404549
163
+ 3.692372,43.404915
164
+ 3.692297,43.404829
165
+ 3.690687,43.404712
166
+ 3.690408,43.404517
167
+ 3.690076,43.404782
168
+ 3.689851,43.404751
169
+ 3.690253,43.404459</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
170
+ <Placemark id="AaY2i">
171
+ <name>Bureau 7</name><ExtendedData>
172
+ <Data name="_umap_options"><value>{"color":"Chartreuse"}</value></Data></ExtendedData>
173
+ <Polygon>
174
+ <outerBoundaryIs>
175
+ <LinearRing><coordinates>3.695998,43.408056
176
+ 3.69514,43.409366
177
+ 3.694577,43.410207
178
+ 3.694679,43.410324
179
+ 3.694968,43.410262
180
+ 3.694936,43.41034
181
+ 3.696814,43.410659
182
+ 3.69705,43.410846
183
+ 3.69735,43.410901
184
+ 3.69765,43.410823
185
+ 3.697758,43.410496
186
+ 3.697565,43.410262
187
+ 3.697629,43.410083
188
+ 3.697876,43.410083
189
+ 3.697715,43.409864
190
+ 3.697876,43.409405
191
+ 3.698101,43.409444
192
+ 3.698133,43.409311
193
+ 3.697972,43.409272
194
+ 3.698047,43.409085
195
+ 3.698262,43.409023
196
+ 3.698326,43.408898
197
+ 3.698616,43.408945
198
+ 3.698659,43.408867
199
+ 3.698809,43.408882
200
+ 3.699163,43.408906
201
+ 3.700461,43.407573
202
+ 3.696985,43.405967
203
+ 3.695998,43.408056</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
204
+ <Placemark id="aZn3x">
205
+ <name>Bureau 8</name><ExtendedData></ExtendedData>
206
+ <Polygon>
207
+ <outerBoundaryIs>
208
+ <LinearRing><coordinates>3.685505,43.400745
209
+ 3.683478,43.402506
210
+ 3.683617,43.402978
211
+ 3.682657,43.403387
212
+ 3.682415,43.402994
213
+ 3.68226,43.403029
214
+ 3.682512,43.403434
215
+ 3.6816,43.403754
216
+ 3.67954,43.404178
217
+ 3.67924,43.403952
218
+ 3.679261,43.404323
219
+ 3.679036,43.404393
220
+ 3.67895,43.40428
221
+ 3.678735,43.404369
222
+ 3.678371,43.404159
223
+ 3.67468,43.404291
224
+ 3.674455,43.403941
225
+ 3.674412,43.404307
226
+ 3.673221,43.404299
227
+ 3.673344,43.403929
228
+ 3.67262,43.403793
229
+ 3.672566,43.403964
230
+ 3.673135,43.404065
231
+ 3.673028,43.404354
232
+ 3.670903,43.404058
233
+ 3.670158,43.40403
234
+ 3.67019,43.403652
235
+ 3.670624,43.403625
236
+ 3.67122,43.403711
237
+ 3.671177,43.402997
238
+ 3.671982,43.403052
239
+ 3.672041,43.402464
240
+ 3.670802,43.402389
241
+ 3.67085,43.402085
242
+ 3.669734,43.401992
243
+ 3.669605,43.400675
244
+ 3.669884,43.400604
245
+ 3.669949,43.400199
246
+ 3.669648,43.400168
247
+ 3.669595,43.399895
248
+ 3.669412,43.399778
249
+ 3.669434,43.398866
250
+ 3.671139,43.398889
251
+ 3.671805,43.399022
252
+ 3.672631,43.398788
253
+ 3.672684,43.399178
254
+ 3.673489,43.399076
255
+ 3.673553,43.398897
256
+ 3.674058,43.399193
257
+ 3.674326,43.3991
258
+ 3.674841,43.398593
259
+ 3.67424,43.397502
260
+ 3.67969,43.396722
261
+ 3.67939,43.395849
262
+ 3.68042,43.395849
263
+ 3.680334,43.395553
264
+ 3.680634,43.395038
265
+ 3.681214,43.395272
266
+ 3.681922,43.39507
267
+ 3.682179,43.39549
268
+ 3.684711,43.395179
269
+ 3.687222,43.394602
270
+ 3.686106,43.39507
271
+ 3.68602,43.395865
272
+ 3.686557,43.396582
273
+ 3.687694,43.396722
274
+ 3.684068,43.398858
275
+ 3.683789,43.399373
276
+ 3.683295,43.399326
277
+ 3.682973,43.398593
278
+ 3.682888,43.399232
279
+ 3.680592,43.39903
280
+ 3.680506,43.399731
281
+ 3.680849,43.400261
282
+ 3.679454,43.400932
283
+ 3.679519,43.401399
284
+ 3.682652,43.400994
285
+ 3.685355,43.400355
286
+ 3.686407,43.400542
287
+ 3.68602,43.400916
288
+ 3.685505,43.400745</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
289
+ <Placemark id="Lvrgc">
290
+ <name>Bureau 9</name><ExtendedData></ExtendedData>
291
+ <Polygon>
292
+ <outerBoundaryIs>
293
+ <LinearRing><coordinates>3.676193,43.394968
294
+ 3.676021,43.394929
295
+ 3.675334,43.395132
296
+ 3.674229,43.395452
297
+ 3.674304,43.395701
298
+ 3.6738,43.395631
299
+ 3.674004,43.395888
300
+ 3.672953,43.396738
301
+ 3.672684,43.396161
302
+ 3.671644,43.396473
303
+ 3.671494,43.395997
304
+ 3.671311,43.396091
305
+ 3.671064,43.395834
306
+ 3.670957,43.395841
307
+ 3.671536,43.396496
308
+ 3.670045,43.397034
309
+ 3.669294,43.396496
310
+ 3.669595,43.396356
311
+ 3.669455,43.396138
312
+ 3.668704,43.396262
313
+ 3.669133,43.397284
314
+ 3.668039,43.397517
315
+ 3.668189,43.397782
316
+ 3.667835,43.397814
317
+ 3.667309,43.396839
318
+ 3.667084,43.396964
319
+ 3.66688,43.396785
320
+ 3.665882,43.397268
321
+ 3.665518,43.397439
322
+ 3.665088,43.397018
323
+ 3.66408,43.397533
324
+ 3.663715,43.397065
325
+ 3.663501,43.397455
326
+ 3.66408,43.398196
327
+ 3.662975,43.398889
328
+ 3.661838,43.397829
329
+ 3.661623,43.397892
330
+ 3.661087,43.397829
331
+ 3.661344,43.397252
332
+ 3.660979,43.3968
333
+ 3.659177,43.395927
334
+ 3.659177,43.395771
335
+ 3.658276,43.395319
336
+ 3.658426,43.394882
337
+ 3.659134,43.395163
338
+ 3.660293,43.393962
339
+ 3.659456,43.393651
340
+ 3.659756,43.39337
341
+ 3.659155,43.393074
342
+ 3.659306,43.392653
343
+ 3.660915,43.393292
344
+ 3.662825,43.392559
345
+ 3.663275,43.392606
346
+ 3.664262,43.39178
347
+ 3.664606,43.392622
348
+ 3.664241,43.393011
349
+ 3.665164,43.393292
350
+ 3.666129,43.393027
351
+ 3.666472,43.392762
352
+ 3.668532,43.39284
353
+ 3.669326,43.393697
354
+ 3.670056,43.393994
355
+ 3.671853,43.393452
356
+ 3.672304,43.393204
357
+ 3.672692,43.392871
358
+ 3.672816,43.392707
359
+ 3.672907,43.392353
360
+ 3.672888,43.392261
361
+ 3.67277,43.392039
362
+ 3.672532,43.391752
363
+ 3.671815,43.391998
364
+ 3.671687,43.391889
365
+ 3.673041,43.391458
366
+ 3.673334,43.391322
367
+ 3.673449,43.391191
368
+ 3.673315,43.391054
369
+ 3.673406,43.390916
370
+ 3.673497,43.390899
371
+ 3.673462,43.39083
372
+ 3.673435,43.390758
373
+ 3.673468,43.390708
374
+ 3.673588,43.39069
375
+ 3.673567,43.390649
376
+ 3.673709,43.390614
377
+ 3.6738,43.390743
378
+ 3.673972,43.390702
379
+ 3.67409,43.39069
380
+ 3.67409,43.390836
381
+ 3.674176,43.390797
382
+ 3.674444,43.390821
383
+ 3.6745,43.390863
384
+ 3.674503,43.390978
385
+ 3.674567,43.390965
386
+ 3.674543,43.390733
387
+ 3.674626,43.390719
388
+ 3.674669,43.391678
389
+ 3.67542,43.392146
390
+ 3.674766,43.392629
391
+ 3.675013,43.392793
392
+ 3.676311,43.393619
393
+ 3.67718,43.394586
394
+ 3.676193,43.394968</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
395
+ <Placemark id="ouLu8">
396
+ <name>Bureau 10</name><ExtendedData>
397
+ <Data name="_umap_options"><value>{"color":"DodgerBlue"}</value></Data></ExtendedData>
398
+ <Polygon>
399
+ <outerBoundaryIs>
400
+ <LinearRing><coordinates>3.699142,43.406349
401
+ 3.699667,43.405687
402
+ 3.699721,43.404588
403
+ 3.698401,43.404401
404
+ 3.698766,43.404237
405
+ 3.698841,43.404237
406
+ 3.698906,43.404104
407
+ 3.699389,43.40412
408
+ 3.699346,43.403691
409
+ 3.699238,43.403652
410
+ 3.699249,43.403598
411
+ 3.699378,43.40359
412
+ 3.699378,43.403504
413
+ 3.69912,43.403465
414
+ 3.699249,43.403247
415
+ 3.699431,43.403177
416
+ 3.699528,43.403192
417
+ 3.699517,43.403372
418
+ 3.699667,43.403403
419
+ 3.699678,43.403356
420
+ 3.699979,43.403496
421
+ 3.699946,43.403637
422
+ 3.699957,43.403754
423
+ 3.699818,43.403878
424
+ 3.699946,43.403909
425
+ 3.699946,43.403987
426
+ 3.700075,43.404081
427
+ 3.700161,43.404026
428
+ 3.700966,43.404447
429
+ 3.701234,43.40451
430
+ 3.702028,43.404564
431
+ 3.702135,43.402678
432
+ 3.70235,43.402709
433
+ 3.704849,43.404619
434
+ 3.705676,43.404112
435
+ 3.702757,43.401462
436
+ 3.702307,43.400869
437
+ 3.703766,43.398468
438
+ 3.706942,43.397876
439
+ 3.708701,43.398905
440
+ 3.707199,43.401867
441
+ 3.7081,43.402304
442
+ 3.711233,43.39825
443
+ 3.714023,43.398344
444
+ 3.715096,43.398811
445
+ 3.716512,43.400776
446
+ 3.723121,43.406482
447
+ 3.725696,43.407168
448
+ 3.725395,43.413185
449
+ 3.726854,43.413808
450
+ 3.724966,43.413933
451
+ 3.725309,43.416115
452
+ 3.724751,43.418016
453
+ 3.707671,43.410441
454
+ 3.699142,43.406349</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
455
+ <Placemark id="043I7">
456
+ <name>Bureau 11</name><ExtendedData></ExtendedData>
457
+ <Polygon>
458
+ <outerBoundaryIs>
459
+ <LinearRing><coordinates>3.695483,43.401092
460
+ 3.695124,43.401068
461
+ 3.695092,43.401205
462
+ 3.694743,43.401197
463
+ 3.694764,43.401392
464
+ 3.694443,43.401555
465
+ 3.694099,43.401629
466
+ 3.692431,43.401618
467
+ 3.691556,43.402896
468
+ 3.691245,43.402787
469
+ 3.690977,43.402974
470
+ 3.690248,43.402865
471
+ 3.689969,43.403364
472
+ 3.689625,43.403255
473
+ 3.689303,43.403707
474
+ 3.688467,43.40366
475
+ 3.688338,43.403863
476
+ 3.687544,43.403551
477
+ 3.688037,43.402943
478
+ 3.687673,43.402896
479
+ 3.686063,43.403551
480
+ 3.685784,43.403925
481
+ 3.684626,43.403644
482
+ 3.685699,43.402428
483
+ 3.684497,43.403551
484
+ 3.683767,43.403738
485
+ 3.683488,43.402475
486
+ 3.685548,43.40076
487
+ 3.685956,43.400947
488
+ 3.686407,43.400526
489
+ 3.685398,43.400371
490
+ 3.684475,43.400573
491
+ 3.682737,43.400963
492
+ 3.679605,43.401337
493
+ 3.679454,43.400994
494
+ 3.680892,43.400261
495
+ 3.68057,43.3997
496
+ 3.68057,43.399045
497
+ 3.68293,43.399232
498
+ 3.682952,43.398562
499
+ 3.683274,43.399342
500
+ 3.683875,43.399373
501
+ 3.684111,43.398858
502
+ 3.686643,43.397471
503
+ 3.687651,43.396722
504
+ 3.687201,43.396644
505
+ 3.686578,43.396551
506
+ 3.685999,43.395865
507
+ 3.686106,43.395116
508
+ 3.687136,43.394695
509
+ 3.687737,43.394539
510
+ 3.687823,43.393541
511
+ 3.688311,43.393113
512
+ 3.689121,43.393323
513
+ 3.689057,43.394446
514
+ 3.689362,43.394485
515
+ 3.689867,43.394173
516
+ 3.6897,43.394064
517
+ 3.68992,43.393284
518
+ 3.690779,43.393514
519
+ 3.690918,43.393354
520
+ 3.691503,43.393666
521
+ 3.691396,43.393791
522
+ 3.692029,43.393955
523
+ 3.692586,43.394765
524
+ 3.69323,43.395467
525
+ 3.69337,43.395381
526
+ 3.694115,43.396239
527
+ 3.695049,43.396083
528
+ 3.6958,43.397689
529
+ 3.695328,43.397736
530
+ 3.695285,43.397556
531
+ 3.695033,43.397615
532
+ 3.695022,43.398461
533
+ 3.694839,43.399318
534
+ 3.695129,43.399279
535
+ 3.695354,43.399685
536
+ 3.695124,43.399829
537
+ 3.695301,43.399957
538
+ 3.695033,43.400269
539
+ 3.69543,43.400448
540
+ 3.69543,43.40076
541
+ 3.695703,43.400963
542
+ 3.695483,43.401092</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
543
+ <Placemark id="YpoWc">
544
+ <name>Bureau 12</name><ExtendedData>
545
+ <Data name="_umap_options"><value>{"color":"Orchid"}</value></Data></ExtendedData>
546
+ <Polygon>
547
+ <outerBoundaryIs>
548
+ <LinearRing><coordinates>3.698788,43.417642
549
+ 3.694496,43.415242
550
+ 3.696127,43.413964
551
+ 3.694046,43.41352
552
+ 3.69308,43.41327
553
+ 3.69279,43.412849
554
+ 3.692715,43.412164
555
+ 3.693123,43.411735
556
+ 3.694153,43.411501
557
+ 3.694679,43.410348
558
+ 3.694957,43.41027
559
+ 3.694968,43.410355
560
+ 3.696781,43.410636
561
+ 3.697039,43.410823
562
+ 3.697318,43.410917
563
+ 3.697661,43.410815
564
+ 3.697758,43.410488
565
+ 3.697565,43.410277
566
+ 3.697608,43.410106
567
+ 3.697886,43.410075
568
+ 3.697715,43.409872
569
+ 3.697876,43.409428
570
+ 3.69808,43.409451
571
+ 3.698123,43.409334
572
+ 3.697994,43.409264
573
+ 3.698037,43.409089
574
+ 3.698246,43.409027
575
+ 3.698326,43.40891
576
+ 3.698595,43.408945
577
+ 3.698654,43.408871
578
+ 3.699169,43.408906
579
+ 3.700461,43.407569
580
+ 3.723378,43.417892
581
+ 3.722091,43.4221
582
+ 3.708315,43.423004
583
+ 3.706555,43.423066
584
+ 3.706555,43.422037
585
+ 3.705482,43.42185
586
+ 3.703079,43.422006
587
+ 3.698788,43.417642</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
588
+ <Placemark id="6iuwV">
589
+ <name>Bureau 13</name><ExtendedData></ExtendedData>
590
+ <Polygon>
591
+ <outerBoundaryIs>
592
+ <LinearRing><coordinates>3.699099,43.406365
593
+ 3.699657,43.405687
594
+ 3.69971,43.404564
595
+ 3.698391,43.404401
596
+ 3.698777,43.404221
597
+ 3.698852,43.404221
598
+ 3.698895,43.404081
599
+ 3.699389,43.40412
600
+ 3.699356,43.403691
601
+ 3.699238,43.40366
602
+ 3.699249,43.403598
603
+ 3.699356,43.403598
604
+ 3.699378,43.403496
605
+ 3.69911,43.403457
606
+ 3.699238,43.403239
607
+ 3.699442,43.403169
608
+ 3.699528,43.4032
609
+ 3.699528,43.403372
610
+ 3.699678,43.403395
611
+ 3.699678,43.403325
612
+ 3.699968,43.403473
613
+ 3.699946,43.403629
614
+ 3.699989,43.403738
615
+ 3.699839,43.403863
616
+ 3.699968,43.403878
617
+ 3.699946,43.403964
618
+ 3.700086,43.404065
619
+ 3.700172,43.404011
620
+ 3.700279,43.404065
621
+ 3.700998,43.40444
622
+ 3.701245,43.404502
623
+ 3.701341,43.402608
624
+ 3.700118,43.402538
625
+ 3.700129,43.401875
626
+ 3.699764,43.401883
627
+ 3.699678,43.401961
628
+ 3.698895,43.401945
629
+ 3.698756,43.401859
630
+ 3.698294,43.401836
631
+ 3.69823,43.401953
632
+ 3.698015,43.401937
633
+ 3.698026,43.40182
634
+ 3.697715,43.401797
635
+ 3.697017,43.405414
636
+ 3.699099,43.406365</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
637
+ <Placemark id="MY3Lr">
638
+ <name>Bureau 14</name><ExtendedData></ExtendedData>
639
+ <Polygon>
640
+ <outerBoundaryIs>
641
+ <LinearRing><coordinates>3.676322,43.393604
642
+ 3.674841,43.392653
643
+ 3.675442,43.392169
644
+ 3.674734,43.391639
645
+ 3.674669,43.390797
646
+ 3.675592,43.390782
647
+ 3.677952,43.391156
648
+ 3.677952,43.391795
649
+ 3.68012,43.392793
650
+ 3.681772,43.393214
651
+ 3.682652,43.393386
652
+ 3.683553,43.393339
653
+ 3.684046,43.392809
654
+ 3.684347,43.392193
655
+ 3.684411,43.39238
656
+ 3.685001,43.392372
657
+ 3.685591,43.392473
658
+ 3.685763,43.392325
659
+ 3.686117,43.392216
660
+ 3.686718,43.392341
661
+ 3.688048,43.392317
662
+ 3.688509,43.392169
663
+ 3.688499,43.392271
664
+ 3.690162,43.392224
665
+ 3.690097,43.392076
666
+ 3.691063,43.392045
667
+ 3.6919,43.392068
668
+ 3.691953,43.392403
669
+ 3.692222,43.392505
670
+ 3.692082,43.392575
671
+ 3.692254,43.393019
672
+ 3.692619,43.393269
673
+ 3.692898,43.393245
674
+ 3.692887,43.393362
675
+ 3.693606,43.393861
676
+ 3.693713,43.394165
677
+ 3.694046,43.394196
678
+ 3.694153,43.394344
679
+ 3.694013,43.394555
680
+ 3.694668,43.394695
681
+ 3.695322,43.394914
682
+ 3.692908,43.394851
683
+ 3.693367,43.395387
684
+ 3.693222,43.395457
685
+ 3.692594,43.394785
686
+ 3.692436,43.394526
687
+ 3.692055,43.393945
688
+ 3.691353,43.393791
689
+ 3.691535,43.393682
690
+ 3.690902,43.393347
691
+ 3.690784,43.393503
692
+ 3.689915,43.393276
693
+ 3.6897,43.394064
694
+ 3.689861,43.394165
695
+ 3.689346,43.394477
696
+ 3.689046,43.394446
697
+ 3.689132,43.393323
698
+ 3.688306,43.393105
699
+ 3.687769,43.393565
700
+ 3.687737,43.394485
701
+ 3.685226,43.395046
702
+ 3.682158,43.395522
703
+ 3.681997,43.395085
704
+ 3.681235,43.39528
705
+ 3.680506,43.395093
706
+ 3.680334,43.395506
707
+ 3.680559,43.395724
708
+ 3.68042,43.39588
709
+ 3.67969,43.395763
710
+ 3.679562,43.395607
711
+ 3.679358,43.395678
712
+ 3.679723,43.396605
713
+ 3.679519,43.396847
714
+ 3.677899,43.396941
715
+ 3.677019,43.397284
716
+ 3.676622,43.396379
717
+ 3.675978,43.396535
718
+ 3.675785,43.396161
719
+ 3.676043,43.396005
720
+ 3.675785,43.395709
721
+ 3.676064,43.395506
722
+ 3.675742,43.395023
723
+ 3.676005,43.394933
724
+ 3.676236,43.39496
725
+ 3.677201,43.394598
726
+ 3.676322,43.393604</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
727
+ <Placemark id="DZ7yf">
728
+ <name>Bureau 15</name><ExtendedData></ExtendedData>
729
+ <Polygon>
730
+ <outerBoundaryIs>
731
+ <LinearRing><coordinates>3.668125,43.401805
732
+ 3.667738,43.401384
733
+ 3.667331,43.401322
734
+ 3.667052,43.401072
735
+ 3.666987,43.400495
736
+ 3.666344,43.400417
737
+ 3.665721,43.400137
738
+ 3.664992,43.400308
739
+ 3.664412,43.3997
740
+ 3.664927,43.39931
741
+ 3.665614,43.39903
742
+ 3.667674,43.398656
743
+ 3.667159,43.398281
744
+ 3.666108,43.398656
745
+ 3.665915,43.398422
746
+ 3.665507,43.398593
747
+ 3.665539,43.398827
748
+ 3.6654,43.398663
749
+ 3.665045,43.398718
750
+ 3.665067,43.398858
751
+ 3.664541,43.399084
752
+ 3.66438,43.399014
753
+ 3.663576,43.399373
754
+ 3.662996,43.398921
755
+ 3.664058,43.398196
756
+ 3.663522,43.397494
757
+ 3.663661,43.397081
758
+ 3.664091,43.397502
759
+ 3.665164,43.397018
760
+ 3.66555,43.397439
761
+ 3.666859,43.3968
762
+ 3.667095,43.396956
763
+ 3.667266,43.396878
764
+ 3.667824,43.397829
765
+ 3.668168,43.397767
766
+ 3.668039,43.39751
767
+ 3.669133,43.397291
768
+ 3.668693,43.39627
769
+ 3.669452,43.396143
770
+ 3.669586,43.39635
771
+ 3.669291,43.396496
772
+ 3.670034,43.397034
773
+ 3.671542,43.396496
774
+ 3.670962,43.395834
775
+ 3.671097,43.395849
776
+ 3.671311,43.396091
777
+ 3.671515,43.395989
778
+ 3.671644,43.396473
779
+ 3.672695,43.396165
780
+ 3.672953,43.396722
781
+ 3.673983,43.39588
782
+ 3.6738,43.395631
783
+ 3.674326,43.39567
784
+ 3.674262,43.395436
785
+ 3.675807,43.395007
786
+ 3.676085,43.395584
787
+ 3.675731,43.395709
788
+ 3.675989,43.396075
789
+ 3.675753,43.396153
790
+ 3.675989,43.396535
791
+ 3.676579,43.396325
792
+ 3.676922,43.397057
793
+ 3.675731,43.397416
794
+ 3.674326,43.397455
795
+ 3.674819,43.398562
796
+ 3.674304,43.399092
797
+ 3.674058,43.399162
798
+ 3.673586,43.398882
799
+ 3.673478,43.399076
800
+ 3.672706,43.399186
801
+ 3.672652,43.398772
802
+ 3.672051,43.398936
803
+ 3.671751,43.399014
804
+ 3.671182,43.398913
805
+ 3.669423,43.398866
806
+ 3.669391,43.399357
807
+ 3.669423,43.399794
808
+ 3.669584,43.399918
809
+ 3.669637,43.400176
810
+ 3.669895,43.400183
811
+ 3.669863,43.400581
812
+ 3.669605,43.400659
813
+ 3.66967,43.400979
814
+ 3.669723,43.401953
815
+ 3.670871,43.402046
816
+ 3.670785,43.402389
817
+ 3.67203,43.402467
818
+ 3.671987,43.403036
819
+ 3.671172,43.40299
820
+ 3.671225,43.403715
821
+ 3.670571,43.403613
822
+ 3.668125,43.401805</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
823
+ <Placemark id="xGVYD">
824
+ <name>Bureau 16</name><ExtendedData>
825
+ <Data name="_umap_options"><value>{"color":"Chartreuse"}</value></Data></ExtendedData>
826
+ <Polygon>
827
+ <outerBoundaryIs>
828
+ <LinearRing><coordinates>3.693091,43.406544
829
+ 3.688059,43.406037
830
+ 3.688617,43.405609
831
+ 3.689175,43.405655
832
+ 3.68926,43.405063
833
+ 3.687952,43.404362
834
+ 3.688316,43.403878
835
+ 3.687458,43.403535
836
+ 3.686299,43.403785
837
+ 3.686128,43.404128
838
+ 3.686299,43.404362
839
+ 3.685634,43.404845
840
+ 3.685913,43.405297
841
+ 3.685441,43.405422
842
+ 3.685098,43.405843
843
+ 3.685784,43.406435
844
+ 3.68778,43.406201
845
+ 3.688016,43.406326
846
+ 3.687705,43.406451
847
+ 3.687812,43.40677
848
+ 3.688606,43.406864
849
+ 3.688692,43.406778
850
+ 3.689089,43.406825
851
+ 3.689142,43.406716
852
+ 3.689325,43.406856
853
+ 3.689314,43.406957
854
+ 3.689893,43.406988
855
+ 3.689904,43.406926
856
+ 3.690033,43.406949
857
+ 3.690076,43.406786
858
+ 3.691031,43.406856
859
+ 3.691245,43.407027
860
+ 3.691245,43.407136
861
+ 3.692694,43.407285
862
+ 3.693112,43.407456
863
+ 3.693144,43.407565
864
+ 3.693262,43.407573
865
+ 3.693466,43.408033
866
+ 3.69426,43.408742
867
+ 3.694646,43.40843
868
+ 3.694904,43.408664
869
+ 3.696041,43.406887
870
+ 3.693091,43.406544</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
871
+ <Placemark id="gFNtY">
872
+ <name>Bureau 17</name><ExtendedData></ExtendedData>
873
+ <Polygon>
874
+ <outerBoundaryIs>
875
+ <LinearRing><coordinates>3.691058,43.407949
876
+ 3.691127,43.407534
877
+ 3.691015,43.407394
878
+ 3.691224,43.40716
879
+ 3.692683,43.407292
880
+ 3.693112,43.407479
881
+ 3.693112,43.407573
882
+ 3.693241,43.407588
883
+ 3.693434,43.408033
884
+ 3.69426,43.408742
885
+ 3.694646,43.408446
886
+ 3.694893,43.408649
887
+ 3.69441,43.409475
888
+ 3.693627,43.410371
889
+ 3.692554,43.409467
890
+ 3.69175,43.409864
891
+ 3.69117,43.409381
892
+ 3.691084,43.409303
893
+ 3.690988,43.409233
894
+ 3.690923,43.409186
895
+ 3.690966,43.408746
896
+ 3.690816,43.408461
897
+ 3.69091,43.408017
898
+ 3.691058,43.407949</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
899
+ <Placemark id="8BP79">
900
+ <name>Bureau 18</name><ExtendedData></ExtendedData>
901
+ <Polygon>
902
+ <outerBoundaryIs>
903
+ <LinearRing><coordinates>3.670152,43.404026
904
+ 3.670893,43.404089
905
+ 3.670882,43.404245
906
+ 3.671955,43.404323
907
+ 3.671805,43.404954
908
+ 3.672878,43.405281
909
+ 3.672727,43.405624
910
+ 3.672566,43.405882
911
+ 3.672116,43.405819
912
+ 3.671708,43.406661
913
+ 3.670732,43.408095
914
+ 3.66864,43.407386
915
+ 3.668082,43.408235
916
+ 3.667567,43.408539
917
+ 3.667073,43.408204
918
+ 3.665936,43.408906
919
+ 3.665636,43.408641
920
+ 3.663962,43.409693
921
+ 3.66246,43.407807
922
+ 3.664391,43.406677
923
+ 3.664262,43.406037
924
+ 3.664445,43.405928
925
+ 3.66496,43.406248
926
+ 3.665142,43.405967
927
+ 3.664638,43.405648
928
+ 3.664353,43.405894
929
+ 3.664261,43.405819
930
+ 3.664553,43.405594
931
+ 3.664362,43.405496
932
+ 3.664099,43.4057
933
+ 3.663975,43.405595
934
+ 3.664235,43.405398
935
+ 3.663028,43.404151
936
+ 3.66276,43.404026
937
+ 3.662513,43.403933
938
+ 3.662417,43.403629
939
+ 3.663093,43.403528
940
+ 3.663104,43.403177
941
+ 3.663865,43.40313
942
+ 3.663983,43.403294
943
+ 3.667288,43.403489
944
+ 3.670174,43.403668
945
+ 3.670152,43.404026</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
946
+ <Placemark id="jva7z">
947
+ <name>Bureau 19</name><ExtendedData></ExtendedData>
948
+ <Polygon>
949
+ <outerBoundaryIs>
950
+ <LinearRing><coordinates>3.670721,43.408126
951
+ 3.67173,43.406638
952
+ 3.672116,43.405796
953
+ 3.672577,43.405897
954
+ 3.673317,43.406279
955
+ 3.673156,43.406677
956
+ 3.675624,43.407331
957
+ 3.675742,43.406934
958
+ 3.676043,43.406903
959
+ 3.676,43.406606
960
+ 3.676375,43.406536
961
+ 3.676375,43.406201
962
+ 3.677684,43.40663
963
+ 3.678285,43.406606
964
+ 3.67851,43.406778
965
+ 3.678124,43.407479
966
+ 3.676622,43.408493
967
+ 3.676257,43.409974
968
+ 3.670721,43.408126</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
969
+ <Placemark id="UJ4zH">
970
+ <name>Bureau 20</name><ExtendedData></ExtendedData>
971
+ <Polygon>
972
+ <outerBoundaryIs>
973
+ <LinearRing><coordinates>3.690891,43.409194
974
+ 3.690956,43.409194
975
+ 3.691771,43.409872
976
+ 3.692554,43.409459
977
+ 3.693627,43.410387
978
+ 3.69293,43.410698
979
+ 3.692265,43.411454
980
+ 3.69175,43.411376
981
+ 3.691063,43.41147
982
+ 3.690677,43.411408
983
+ 3.69029,43.411454
984
+ 3.690012,43.411415
985
+ 3.689904,43.411579
986
+ 3.689507,43.411618
987
+ 3.689443,43.411486
988
+ 3.68926,43.411571
989
+ 3.6891,43.4114
990
+ 3.689228,43.411314
991
+ 3.689239,43.411135
992
+ 3.688992,43.41108
993
+ 3.688756,43.41087
994
+ 3.688853,43.410831
995
+ 3.688767,43.41062
996
+ 3.688434,43.410605
997
+ 3.688091,43.410246
998
+ 3.688252,43.41002
999
+ 3.689089,43.40956
1000
+ 3.689057,43.409272
1001
+ 3.690548,43.408461
1002
+ 3.69072,43.408563
1003
+ 3.690891,43.409194</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
1004
+ <Placemark id="r7MPB">
1005
+ <name>Bureau 21</name><ExtendedData></ExtendedData>
1006
+ <Polygon>
1007
+ <outerBoundaryIs>
1008
+ <LinearRing><coordinates>3.688402,43.410605
1009
+ 3.688767,43.410613
1010
+ 3.688874,43.410823
1011
+ 3.688745,43.41087
1012
+ 3.688982,43.411072
1013
+ 3.68926,43.411111
1014
+ 3.68925,43.411306
1015
+ 3.6891,43.4114
1016
+ 3.689239,43.411563
1017
+ 3.68911,43.411673
1018
+ 3.687887,43.411852
1019
+ 3.687973,43.411665
1020
+ 3.687737,43.411447
1021
+ 3.687544,43.411415
1022
+ 3.687222,43.411026
1023
+ 3.687576,43.410815
1024
+ 3.687383,43.410652
1025
+ 3.687179,43.410706
1026
+ 3.686568,43.410067
1027
+ 3.687469,43.409646
1028
+ 3.687136,43.409303
1029
+ 3.686804,43.409436
1030
+ 3.685892,43.408384
1031
+ 3.685033,43.408797
1032
+ 3.684744,43.408532
1033
+ 3.684937,43.408313
1034
+ 3.685838,43.407892
1035
+ 3.685634,43.407651
1036
+ 3.686514,43.407246
1037
+ 3.686353,43.406996
1038
+ 3.686535,43.406957
1039
+ 3.68646,43.406716
1040
+ 3.686954,43.406591
1041
+ 3.686879,43.40638
1042
+ 3.687758,43.406193
1043
+ 3.688011,43.406337
1044
+ 3.687721,43.406439
1045
+ 3.687807,43.406786
1046
+ 3.688622,43.406864
1047
+ 3.688697,43.406782
1048
+ 3.689121,43.406825
1049
+ 3.689164,43.40672
1050
+ 3.68933,43.40686
1051
+ 3.689314,43.406965
1052
+ 3.689904,43.407004
1053
+ 3.689931,43.40695
1054
+ 3.690049,43.406954
1055
+ 3.690092,43.406802
1056
+ 3.691031,43.406903
1057
+ 3.691246,43.407137
1058
+ 3.69102,43.407414
1059
+ 3.691128,43.407554
1060
+ 3.691074,43.407979
1061
+ 3.690908,43.408037
1062
+ 3.690827,43.408474
1063
+ 3.690988,43.408758
1064
+ 3.690924,43.40921
1065
+ 3.69058,43.408509
1066
+ 3.689046,43.409296
1067
+ 3.6891,43.409576
1068
+ 3.68822,43.41002
1069
+ 3.688091,43.410254
1070
+ 3.688402,43.410605</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
1071
+ <Placemark id="8NzmY">
1072
+ <name>Bureau 22</name><ExtendedData></ExtendedData>
1073
+ <Polygon>
1074
+ <outerBoundaryIs>
1075
+ <LinearRing><coordinates>3.685741,43.406965
1076
+ 3.681879,43.407744
1077
+ 3.681686,43.407542
1078
+ 3.681257,43.40762
1079
+ 3.681107,43.407433
1080
+ 3.680592,43.407464
1081
+ 3.680613,43.407183
1082
+ 3.679819,43.407183
1083
+ 3.679132,43.406809
1084
+ 3.678596,43.406762
1085
+ 3.678339,43.406622
1086
+ 3.677609,43.406638
1087
+ 3.676364,43.406217
1088
+ 3.676322,43.406497
1089
+ 3.675978,43.406575
1090
+ 3.676,43.406887
1091
+ 3.675742,43.406903
1092
+ 3.67557,43.407292
1093
+ 3.67321,43.4067
1094
+ 3.67336,43.406295
1095
+ 3.672631,43.405874
1096
+ 3.672867,43.405281
1097
+ 3.671901,43.40497
1098
+ 3.671944,43.404362
1099
+ 3.670914,43.404268
1100
+ 3.670893,43.404065
1101
+ 3.67306,43.404362
1102
+ 3.673146,43.40405
1103
+ 3.672566,43.403987
1104
+ 3.672631,43.403785
1105
+ 3.673339,43.403925
1106
+ 3.673189,43.404315
1107
+ 3.674433,43.40433
1108
+ 3.674476,43.403987
1109
+ 3.674669,43.404315
1110
+ 3.67836,43.404143
1111
+ 3.678725,43.404377
1112
+ 3.678972,43.404291
1113
+ 3.679036,43.404393
1114
+ 3.679283,43.404323
1115
+ 3.67925,43.403956
1116
+ 3.67954,43.40419
1117
+ 3.680667,43.403941
1118
+ 3.681611,43.403754
1119
+ 3.682029,43.403582
1120
+ 3.682523,43.403426
1121
+ 3.682265,43.403036
1122
+ 3.682415,43.40299
1123
+ 3.682662,43.403379
1124
+ 3.683628,43.402982
1125
+ 3.683488,43.402514
1126
+ 3.683703,43.402982
1127
+ 3.683767,43.40306
1128
+ 3.683639,43.403699
1129
+ 3.684475,43.403574
1130
+ 3.685623,43.40253
1131
+ 3.685656,43.402584
1132
+ 3.684551,43.403598
1133
+ 3.685795,43.404026
1134
+ 3.686053,43.403582
1135
+ 3.686997,43.403208
1136
+ 3.687683,43.402951
1137
+ 3.688059,43.40299
1138
+ 3.687555,43.40352
1139
+ 3.686321,43.403754
1140
+ 3.686106,43.404112
1141
+ 3.686192,43.404346
1142
+ 3.685613,43.40486
1143
+ 3.685859,43.405258
1144
+ 3.685377,43.405375
1145
+ 3.685119,43.405889
1146
+ 3.685741,43.406497
1147
+ 3.686557,43.406404
1148
+ 3.685741,43.406965</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
1149
+ <Placemark id="7iQ5D">
1150
+ <name>Bureau 23</name><ExtendedData></ExtendedData>
1151
+ <Polygon>
1152
+ <outerBoundaryIs>
1153
+ <LinearRing><coordinates>3.682984,43.413442
1154
+ 3.680667,43.413987
1155
+ 3.679122,43.41426
1156
+ 3.677469,43.414377
1157
+ 3.676718,43.413831
1158
+ 3.67704,43.413442
1159
+ 3.67733,43.413582
1160
+ 3.677588,43.413247
1161
+ 3.677341,43.413099
1162
+ 3.677802,43.412514
1163
+ 3.677974,43.4126
1164
+ 3.678832,43.411992
1165
+ 3.679068,43.412109
1166
+ 3.67924,43.411587
1167
+ 3.678886,43.411462
1168
+ 3.679014,43.411298
1169
+ 3.67887,43.411248
1170
+ 3.679004,43.411104
1171
+ 3.679808,43.411088
1172
+ 3.681117,43.411587
1173
+ 3.681332,43.411369
1174
+ 3.681332,43.411033
1175
+ 3.681686,43.411174
1176
+ 3.681718,43.411088
1177
+ 3.681514,43.41101
1178
+ 3.681718,43.410706
1179
+ 3.681096,43.410457
1180
+ 3.681332,43.410028
1181
+ 3.682834,43.410036
1182
+ 3.683102,43.410231
1183
+ 3.683295,43.410332
1184
+ 3.683982,43.410129
1185
+ 3.683692,43.40988
1186
+ 3.6841,43.409615
1187
+ 3.684025,43.409576
1188
+ 3.68439,43.409334
1189
+ 3.685452,43.410449
1190
+ 3.685269,43.41055
1191
+ 3.685473,43.410792
1192
+ 3.684443,43.411275
1193
+ 3.684529,43.411727
1194
+ 3.684787,43.411899
1195
+ 3.684078,43.412148
1196
+ 3.684422,43.413013
1197
+ 3.682984,43.413442</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
1198
+ <Placemark id="oQc1Y">
1199
+ <name>Bureau 24</name><ExtendedData>
1200
+ <Data name="_umap_options"><value>{"color":"Chartreuse"}</value></Data></ExtendedData>
1201
+ <Polygon>
1202
+ <outerBoundaryIs>
1203
+ <LinearRing><coordinates>3.664713,43.417081
1204
+ 3.657546,43.412093
1205
+ 3.659241,43.410784
1206
+ 3.661923,43.410161
1207
+ 3.662347,43.410523
1208
+ 3.662771,43.411505
1209
+ 3.661956,43.412027
1210
+ 3.661301,43.411614
1211
+ 3.660829,43.411969
1212
+ 3.662642,43.413208
1213
+ 3.662642,43.413489
1214
+ 3.662975,43.413738
1215
+ 3.663232,43.413668
1216
+ 3.66364,43.413917
1217
+ 3.663608,43.414151
1218
+ 3.663844,43.414291
1219
+ 3.664273,43.414369
1220
+ 3.66555,43.415289
1221
+ 3.665679,43.415164
1222
+ 3.665979,43.415343
1223
+ 3.665807,43.415484
1224
+ 3.666322,43.415834
1225
+ 3.664713,43.417081</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
1226
+ <Placemark id="yDXRa">
1227
+ <name>Bureau 25</name><ExtendedData></ExtendedData>
1228
+ <Polygon>
1229
+ <outerBoundaryIs>
1230
+ <LinearRing><coordinates>3.66982,43.419637
1231
+ 3.671257,43.420557
1232
+ 3.674004,43.423518
1233
+ 3.674026,43.423923
1234
+ 3.675549,43.424422
1235
+ 3.6763,43.423658
1236
+ 3.677244,43.423658
1237
+ 3.677888,43.421913
1238
+ 3.677094,43.420962
1239
+ 3.673983,43.418842
1240
+ 3.673768,43.417175
1241
+ 3.673961,43.416255
1242
+ 3.674583,43.415647
1243
+ 3.675302,43.415141
1244
+ 3.676461,43.414696
1245
+ 3.6791,43.41447
1246
+ 3.681911,43.413925
1247
+ 3.682501,43.413956
1248
+ 3.682823,43.414057
1249
+ 3.683242,43.413379
1250
+ 3.680677,43.413995
1251
+ 3.679068,43.414276
1252
+ 3.677502,43.414361
1253
+ 3.676611,43.41447
1254
+ 3.675356,43.414455
1255
+ 3.674852,43.414494
1256
+ 3.67483,43.414572
1257
+ 3.674465,43.414556
1258
+ 3.674423,43.414665
1259
+ 3.67395,43.414743
1260
+ 3.673382,43.415468
1261
+ 3.673736,43.415624
1262
+ 3.673521,43.41592
1263
+ 3.673221,43.415756
1264
+ 3.673081,43.41592
1265
+ 3.672695,43.415717
1266
+ 3.672266,43.416232
1267
+ 3.67292,43.416598
1268
+ 3.672792,43.416801
1269
+ 3.672588,43.416692
1270
+ 3.671719,43.417728
1271
+ 3.671955,43.417853
1272
+ 3.671268,43.418648
1273
+ 3.670903,43.418492
1274
+ 3.67071,43.41871
1275
+ 3.669788,43.418157
1276
+ 3.669938,43.418001
1277
+ 3.669809,43.417907
1278
+ 3.669423,43.418281
1279
+ 3.669863,43.418601
1280
+ 3.670163,43.418796
1281
+ 3.670067,43.418905
1282
+ 3.670238,43.418967
1283
+ 3.670152,43.419076
1284
+ 3.669959,43.418991
1285
+ 3.669884,43.419084
1286
+ 3.66968,43.418975
1287
+ 3.66938,43.4191
1288
+ 3.669262,43.419045
1289
+ 3.669273,43.418702
1290
+ 3.668908,43.418788
1291
+ 3.668575,43.419232
1292
+ 3.668897,43.419559
1293
+ 3.66982,43.419637</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
1294
+ <Placemark id="roqV8">
1295
+ <name>Bureau 26</name><ExtendedData>
1296
+ <Data name="_umap_options"><value>{"color":"Crimson"}</value></Data></ExtendedData>
1297
+ <Polygon>
1298
+ <outerBoundaryIs>
1299
+ <LinearRing><coordinates>3.662868,43.410254
1300
+ 3.665528,43.412171
1301
+ 3.665496,43.412771
1302
+ 3.667234,43.414057
1303
+ 3.667331,43.415647
1304
+ 3.666998,43.415749
1305
+ 3.666215,43.415187
1306
+ 3.666033,43.415297
1307
+ 3.665775,43.415071
1308
+ 3.666022,43.414891
1309
+ 3.665282,43.414385
1310
+ 3.665013,43.414548
1311
+ 3.664702,43.414322
1312
+ 3.664906,43.414167
1313
+ 3.664713,43.413987
1314
+ 3.664262,43.414346
1315
+ 3.663844,43.414307
1316
+ 3.663597,43.414167
1317
+ 3.663629,43.413902
1318
+ 3.663222,43.413652
1319
+ 3.662964,43.41373
1320
+ 3.662642,43.413504
1321
+ 3.662642,43.413224
1322
+ 3.663189,43.412865
1323
+ 3.662503,43.412382
1324
+ 3.662213,43.412577
1325
+ 3.66172,43.412203
1326
+ 3.662792,43.41147
1327
+ 3.662353,43.410496
1328
+ 3.662868,43.410254</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
1329
+ <Placemark id="6YOqE">
1330
+ <name>Bureau 27</name><ExtendedData></ExtendedData>
1331
+ <Polygon>
1332
+ <outerBoundaryIs>
1333
+ <LinearRing><coordinates>3.674819,43.414556
1334
+ 3.674476,43.414548
1335
+ 3.67439,43.414681
1336
+ 3.673961,43.414743
1337
+ 3.67336,43.415468
1338
+ 3.673725,43.415639
1339
+ 3.673511,43.415889
1340
+ 3.673221,43.415764
1341
+ 3.673092,43.415904
1342
+ 3.672663,43.415733
1343
+ 3.672255,43.416208
1344
+ 3.67292,43.416614
1345
+ 3.672781,43.416801
1346
+ 3.672609,43.416707
1347
+ 3.67173,43.417728
1348
+ 3.671933,43.417861
1349
+ 3.671247,43.418632
1350
+ 3.670914,43.418492
1351
+ 3.6707,43.418694
1352
+ 3.669755,43.418164
1353
+ 3.669949,43.418009
1354
+ 3.66982,43.417923
1355
+ 3.669401,43.418274
1356
+ 3.670152,43.418788
1357
+ 3.670056,43.418913
1358
+ 3.670228,43.418944
1359
+ 3.670131,43.419053
1360
+ 3.669873,43.419076
1361
+ 3.669659,43.418967
1362
+ 3.669348,43.419107
1363
+ 3.669262,43.419045
1364
+ 3.669219,43.41871
1365
+ 3.668865,43.418803
1366
+ 3.668565,43.419193
1367
+ 3.667953,43.418788
1368
+ 3.667803,43.41878
1369
+ 3.667427,43.419022
1370
+ 3.666569,43.418546
1371
+ 3.666676,43.418507
1372
+ 3.667438,43.41892
1373
+ 3.667749,43.418702
1374
+ 3.666655,43.418016
1375
+ 3.666462,43.41818
1376
+ 3.666258,43.418445
1377
+ 3.666097,43.418991
1378
+ 3.66599,43.418959
1379
+ 3.666011,43.418616
1380
+ 3.666451,43.417884
1381
+ 3.666054,43.417198
1382
+ 3.665839,43.417214
1383
+ 3.665679,43.417011
1384
+ 3.665968,43.416863
1385
+ 3.665979,43.41652
1386
+ 3.666869,43.415889
1387
+ 3.667556,43.415562
1388
+ 3.667449,43.414112
1389
+ 3.666043,43.412725
1390
+ 3.665646,43.411977
1391
+ 3.664573,43.411166
1392
+ 3.663576,43.410114
1393
+ 3.663136,43.40967
1394
+ 3.662653,43.409553
1395
+ 3.661956,43.410067
1396
+ 3.661784,43.410059
1397
+ 3.662417,43.409459
1398
+ 3.663211,43.408906
1399
+ 3.663919,43.409747
1400
+ 3.665314,43.408851
1401
+ 3.665861,43.40928
1402
+ 3.665872,43.409529
1403
+ 3.666215,43.40981
1404
+ 3.666569,43.409825
1405
+ 3.667063,43.410184
1406
+ 3.667213,43.410371
1407
+ 3.666773,43.410667
1408
+ 3.666869,43.411041
1409
+ 3.667234,43.411096
1410
+ 3.667449,43.410979
1411
+ 3.667631,43.411096
1412
+ 3.667846,43.410956
1413
+ 3.668232,43.41126
1414
+ 3.668586,43.411143
1415
+ 3.669155,43.411797
1416
+ 3.668371,43.412319
1417
+ 3.668844,43.412538
1418
+ 3.670346,43.411462
1419
+ 3.670689,43.411696
1420
+ 3.671504,43.411096
1421
+ 3.672287,43.411938
1422
+ 3.671676,43.412351
1423
+ 3.67203,43.4126
1424
+ 3.671547,43.41292
1425
+ 3.672287,43.413551
1426
+ 3.671547,43.414034
1427
+ 3.673725,43.4144
1428
+ 3.674873,43.414502
1429
+ 3.674819,43.414556</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
1430
+ <Placemark id="7aZin">
1431
+ <name>Bureau 28</name><ExtendedData></ExtendedData>
1432
+ <Polygon>
1433
+ <outerBoundaryIs>
1434
+ <LinearRing><coordinates>3.686471,43.406723
1435
+ 3.686535,43.406942
1436
+ 3.686374,43.407004
1437
+ 3.686503,43.407238
1438
+ 3.685634,43.407659
1439
+ 3.685817,43.407877
1440
+ 3.684947,43.408298
1441
+ 3.684379,43.408937
1442
+ 3.684658,43.408914
1443
+ 3.684754,43.409069
1444
+ 3.684626,43.409171
1445
+ 3.684036,43.409576
1446
+ 3.6841,43.409615
1447
+ 3.683692,43.409888
1448
+ 3.683968,43.410129
1449
+ 3.683298,43.410332
1450
+ 3.683099,43.410229
1451
+ 3.682845,43.410028
1452
+ 3.6813,43.41002
1453
+ 3.681107,43.410449
1454
+ 3.681707,43.410722
1455
+ 3.681525,43.411014
1456
+ 3.681718,43.411088
1457
+ 3.681702,43.41117
1458
+ 3.681327,43.411037
1459
+ 3.681321,43.411388
1460
+ 3.681123,43.411595
1461
+ 3.679808,43.411082
1462
+ 3.678998,43.411107
1463
+ 3.678886,43.4108
1464
+ 3.678435,43.410648
1465
+ 3.678373,43.410515
1466
+ 3.678854,43.40852
1467
+ 3.679132,43.407791
1468
+ 3.678145,43.407526
1469
+ 3.678532,43.406778
1470
+ 3.679111,43.40684
1471
+ 3.679755,43.407168
1472
+ 3.680592,43.407183
1473
+ 3.680592,43.407479
1474
+ 3.681085,43.407433
1475
+ 3.681171,43.407666
1476
+ 3.681686,43.407573
1477
+ 3.681772,43.407776
1478
+ 3.68572,43.406949
1479
+ 3.686637,43.40638
1480
+ 3.686868,43.406334
1481
+ 3.686932,43.40656
1482
+ 3.686471,43.406723</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
1483
+ <Placemark id="sJOtO">
1484
+ <name>Bureau 29</name><ExtendedData></ExtendedData>
1485
+ <Polygon>
1486
+ <outerBoundaryIs>
1487
+ <LinearRing><coordinates>3.663919,43.40327
1488
+ 3.663822,43.403122
1489
+ 3.663125,43.403177
1490
+ 3.663114,43.403528
1491
+ 3.662417,43.403621
1492
+ 3.662524,43.403933
1493
+ 3.66305,43.404151
1494
+ 3.664252,43.405406
1495
+ 3.663973,43.405594
1496
+ 3.664099,43.405701
1497
+ 3.664364,43.405497
1498
+ 3.664551,43.405593
1499
+ 3.664262,43.405821
1500
+ 3.664352,43.405895
1501
+ 3.664627,43.405648
1502
+ 3.665131,43.405975
1503
+ 3.664927,43.406232
1504
+ 3.664445,43.405927
1505
+ 3.664262,43.406035
1506
+ 3.664327,43.4067
1507
+ 3.66246,43.407807
1508
+ 3.663232,43.408898
1509
+ 3.661752,43.409989
1510
+ 3.656774,43.402787
1511
+ 3.658136,43.401399
1512
+ 3.658544,43.401173
1513
+ 3.659145,43.40048
1514
+ 3.659037,43.39995
1515
+ 3.659649,43.398819
1516
+ 3.660765,43.399092
1517
+ 3.662041,43.398009
1518
+ 3.662835,43.398866
1519
+ 3.663565,43.399396
1520
+ 3.664391,43.398999
1521
+ 3.664541,43.399092
1522
+ 3.665067,43.398858
1523
+ 3.665024,43.398718
1524
+ 3.66541,43.39864
1525
+ 3.66555,43.398819
1526
+ 3.665496,43.398601
1527
+ 3.665915,43.398437
1528
+ 3.666086,43.398663
1529
+ 3.667202,43.398281
1530
+ 3.667728,43.398663
1531
+ 3.665593,43.39903
1532
+ 3.664863,43.399326
1533
+ 3.66438,43.399669
1534
+ 3.66497,43.400332
1535
+ 3.665646,43.400137
1536
+ 3.666354,43.400425
1537
+ 3.666934,43.400441
1538
+ 3.666998,43.401033
1539
+ 3.667363,43.401368
1540
+ 3.667728,43.401407
1541
+ 3.668028,43.401758
1542
+ 3.669251,43.402725
1543
+ 3.670592,43.403629
1544
+ 3.669852,43.40366
1545
+ 3.667331,43.403489
1546
+ 3.663919,43.40327</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
1547
+ <Placemark id="4Xq6M">
1548
+ <name>Bureau 30</name><ExtendedData></ExtendedData>
1549
+ <Polygon>
1550
+ <outerBoundaryIs>
1551
+ <LinearRing><coordinates>3.684443,43.413021
1552
+ 3.684068,43.41214
1553
+ 3.684829,43.411899
1554
+ 3.68454,43.411735
1555
+ 3.684454,43.411275
1556
+ 3.685484,43.410792
1557
+ 3.685259,43.41055
1558
+ 3.685462,43.410449
1559
+ 3.684422,43.409342
1560
+ 3.684744,43.409062
1561
+ 3.684669,43.408906
1562
+ 3.684497,43.409038
1563
+ 3.68439,43.408937
1564
+ 3.684733,43.4085
1565
+ 3.685033,43.408804
1566
+ 3.685881,43.408376
1567
+ 3.686535,43.409155
1568
+ 3.686804,43.409451
1569
+ 3.687136,43.409295
1570
+ 3.687479,43.409646
1571
+ 3.68661,43.410067
1572
+ 3.687201,43.410737
1573
+ 3.687394,43.410652
1574
+ 3.687587,43.410839
1575
+ 3.687211,43.411026
1576
+ 3.687544,43.411408
1577
+ 3.687748,43.411454
1578
+ 3.687898,43.411556
1579
+ 3.687994,43.411665
1580
+ 3.687887,43.411844
1581
+ 3.688155,43.411817
1582
+ 3.68852,43.411762
1583
+ 3.688901,43.4117
1584
+ 3.689126,43.411661
1585
+ 3.689448,43.411474
1586
+ 3.689518,43.411618
1587
+ 3.689904,43.411571
1588
+ 3.690012,43.411404
1589
+ 3.69029,43.41147
1590
+ 3.690687,43.411384
1591
+ 3.691117,43.411478
1592
+ 3.691771,43.411369
1593
+ 3.692232,43.41147
1594
+ 3.693091,43.411743
1595
+ 3.692704,43.412156
1596
+ 3.692758,43.412865
1597
+ 3.693048,43.413255
1598
+ 3.696095,43.413987
1599
+ 3.695741,43.414237
1600
+ 3.694711,43.414065
1601
+ 3.694614,43.414213
1602
+ 3.694528,43.414213
1603
+ 3.694571,43.414026
1604
+ 3.693391,43.413839
1605
+ 3.692833,43.414455
1606
+ 3.690902,43.414572
1607
+ 3.690269,43.414213
1608
+ 3.689378,43.414167
1609
+ 3.688681,43.414696
1610
+ 3.688198,43.414595
1611
+ 3.688005,43.414829
1612
+ 3.687726,43.414743
1613
+ 3.688027,43.414439
1614
+ 3.687426,43.414198
1615
+ 3.687072,43.41352
1616
+ 3.686428,43.413231
1617
+ 3.686213,43.413411
1618
+ 3.686138,43.413372
1619
+ 3.686235,43.413231
1620
+ 3.685516,43.413052
1621
+ 3.684754,43.413816
1622
+ 3.684347,43.414057
1623
+ 3.683639,43.414151
1624
+ 3.682855,43.414042
1625
+ 3.683231,43.413379
1626
+ 3.684443,43.413021</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
1627
+ <Placemark id="oaH2G">
1628
+ <name>Bureau 31</name><ExtendedData>
1629
+ <Data name="_umap_options"><value>{"color":"BlueViolet"}</value></Data></ExtendedData>
1630
+ <Polygon>
1631
+ <outerBoundaryIs>
1632
+ <LinearRing><coordinates>3.648233,43.397627
1633
+ 3.649971,43.398546
1634
+ 3.654606,43.400402
1635
+ 3.656645,43.402023
1636
+ 3.656752,43.40235
1637
+ 3.654971,43.404143
1638
+ 3.659145,43.400472
1639
+ 3.659027,43.399942
1640
+ 3.659638,43.398811
1641
+ 3.660754,43.399069
1642
+ 3.66202,43.398001
1643
+ 3.661805,43.397829
1644
+ 3.661602,43.397907
1645
+ 3.660979,43.397829
1646
+ 3.661333,43.397315
1647
+ 3.660979,43.396816
1648
+ 3.659091,43.395865
1649
+ 3.659177,43.395787
1650
+ 3.658254,43.395319
1651
+ 3.658447,43.394836
1652
+ 3.659155,43.395194
1653
+ 3.660336,43.393931
1654
+ 3.659477,43.393666
1655
+ 3.659756,43.393354
1656
+ 3.659155,43.39312
1657
+ 3.659327,43.392637
1658
+ 3.660979,43.393261
1659
+ 3.662868,43.392544
1660
+ 3.662782,43.392138
1661
+ 3.661816,43.391795
1662
+ 3.659906,43.391296
1663
+ 3.659542,43.390205
1664
+ 3.657911,43.38955
1665
+ 3.655765,43.389643
1666
+ 3.653941,43.389565
1667
+ 3.65422,43.389175
1668
+ 3.653812,43.389394
1669
+ 3.648856,43.38707
1670
+ 3.643341,43.392045
1671
+ 3.648233,43.397627</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
1672
+ <Placemark id="h0wD5">
1673
+ <name>Bureau 32</name><ExtendedData></ExtendedData>
1674
+ <Polygon>
1675
+ <outerBoundaryIs>
1676
+ <LinearRing><coordinates>3.671579,43.412951
1677
+ 3.671987,43.412592
1678
+ 3.671708,43.412358
1679
+ 3.672309,43.411938
1680
+ 3.671515,43.41108
1681
+ 3.6707,43.411688
1682
+ 3.670399,43.411439
1683
+ 3.669444,43.412117
1684
+ 3.668844,43.41253
1685
+ 3.668329,43.412312
1686
+ 3.669155,43.411797
1687
+ 3.668575,43.411143
1688
+ 3.668221,43.411228
1689
+ 3.667835,43.410971
1690
+ 3.66762,43.41108
1691
+ 3.667427,43.410979
1692
+ 3.667223,43.411088
1693
+ 3.666848,43.411018
1694
+ 3.666751,43.410659
1695
+ 3.667191,43.410379
1696
+ 3.667052,43.410184
1697
+ 3.666537,43.40981
1698
+ 3.666204,43.409802
1699
+ 3.665882,43.409537
1700
+ 3.665839,43.409264
1701
+ 3.665335,43.408836
1702
+ 3.665646,43.408641
1703
+ 3.665957,43.408914
1704
+ 3.667084,43.408204
1705
+ 3.667556,43.408555
1706
+ 3.668103,43.40822
1707
+ 3.66865,43.407355
1708
+ 3.672835,43.408875
1709
+ 3.675238,43.409716
1710
+ 3.677652,43.41027
1711
+ 3.677566,43.410558
1712
+ 3.677094,43.410566
1713
+ 3.67615,43.411127
1714
+ 3.67557,43.410768
1715
+ 3.674841,43.41108
1716
+ 3.675377,43.411751
1717
+ 3.672287,43.413543
1718
+ 3.671579,43.412951</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
1719
+ <Placemark id="ydMJC">
1720
+ <name>Bureau 33</name><ExtendedData></ExtendedData>
1721
+ <Polygon>
1722
+ <outerBoundaryIs>
1723
+ <LinearRing><coordinates>3.678371,43.410519
1724
+ 3.678435,43.410652
1725
+ 3.678875,43.410792
1726
+ 3.679004,43.411119
1727
+ 3.678864,43.411244
1728
+ 3.679013,43.411298
1729
+ 3.678887,43.411465
1730
+ 3.679237,43.411591
1731
+ 3.679047,43.412125
1732
+ 3.678778,43.412008
1733
+ 3.677942,43.412623
1734
+ 3.677802,43.412545
1735
+ 3.677309,43.413083
1736
+ 3.677545,43.41327
1737
+ 3.677298,43.41359
1738
+ 3.67703,43.413481
1739
+ 3.676718,43.413839
1740
+ 3.677405,43.414361
1741
+ 3.6766,43.41447
1742
+ 3.675431,43.414447
1743
+ 3.674852,43.414486
1744
+ 3.673768,43.414408
1745
+ 3.671569,43.41405
1746
+ 3.672266,43.413582
1747
+ 3.675452,43.411727
1748
+ 3.67483,43.411104
1749
+ 3.67557,43.410761
1750
+ 3.67601,43.411057
1751
+ 3.676139,43.411135
1752
+ 3.677083,43.410589
1753
+ 3.677545,43.410558
1754
+ 3.677673,43.410301
1755
+ 3.676279,43.409958
1756
+ 3.676622,43.408532
1757
+ 3.678049,43.407464
1758
+ 3.678424,43.407643
1759
+ 3.679143,43.407721
1760
+ 3.678854,43.408508
1761
+ 3.678575,43.409716
1762
+ 3.678371,43.410519</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark></Document></kml>
data/interim/elections_long.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70fc51d5dd8303c51339a95f818198ba0cc5f26e2a3dc951eae664eb8953a54d
3
+ size 2216814
data/mapping_candidats_blocs.csv ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ code_candidature;nom_candidature;bloc_1;bloc_2;bloc_3
2
+ NC;Nuance non communiquee;centre;;
3
+ LDIV;Divers;centre;;
4
+ DIV;Divers;centre;;
5
+ LDVD;Divers droite;droite_modere;droite_dure
6
+ LDVG;Divers gauche;gauche_modere;
7
+ LUG;Union de la gauche;;gauche_modere
8
+ LUD;Union de la droite;droite;droite_modere
9
+ LFN;Front national;extreme_droite;;
10
+ LEXG;Extreme gauche;extreme_gauche;;
11
+ LSOC;Parti socialiste;gauche_modere;
12
+ LUMP;Union pour un mouvement populaire;droite_modere;
13
+ LNC;Nouveau centre;centre;;
14
+ LFG;Front de gauche;gauche_dure;
15
+ LVEC;Europe Ecologie Les Verts;gauche_modere;;
16
+ LUDI;Union des democrates et independants;centre;droite_modere
17
+ LDVC;Divers centre;centre;;
18
+ LCOM;Parti communiste;gauche_dure;
19
+ LRN;Rassemblement national;extreme_droite;;
20
+ LUC;Union du centre;centre;;
21
+ LPG;Parti de gauche;gauche_dure;
22
+ LMDM;Mouvement democrate;centre;;
23
+ LLR;Les republicains;droite_modere;
24
+ LEXD;Extreme droite;extreme_droite;;
25
+ LREM;La republique en marche;centre;droite_modere
26
+ LFI;La France insoumise;gauche_dure;;
27
+ LECO;Ecologistes;gauche_modere;;
28
+ LREG;Regionalistes;centre;;
29
+ LGJ;Gilets jaunes;;
30
+ LRDG;Radicaux de gauche;gauche_modere;centre
31
+ LDLF;Debout la France;droite_dure;
32
+ RN;Rassemblement national;extreme_droite;;
33
+ LR;Les republicains;droite_modere;centre
34
+ EELV;Europe Ecologie Les Verts;gauche_modere;;
35
+ PS;Parti socialiste;gauche_modere;;
36
+ UDI;Union des democrates et independants;centre;droite_modere
37
+ PRG;Parti radical de gauche;gauche_modere;centre
38
+ DVD;Divers droite;droite_modere;droite_dure
39
+ DVG;Divers gauche;gauche_modere;
40
+ EXD;Extreme droite;extreme_droite;;
41
+ EXG;Extreme gauche;extreme_gauche;;
42
+ FN;Front national;extreme_droite;;
43
+ DLF;Debout la France;droite_dure;
44
+ REM;La republique en marche;centre;droite_modere
45
+ ENS;Ensemble;centre;droite_modere
46
+ LENS;Ensemble;centre;droite_modere
47
+ REC;Reconquete;extreme_droite;;
48
+ LREC;Reconquete;extreme_droite;;
49
+ DSV;Divers souverainiste;droite_dure;
50
+ LDSV;Divers souverainiste;droite_dure;
51
+ LUGE;Union de la gauche elargie;gauche_modere;
52
+ COM;Parti communiste;gauche_dure;
53
+ SOC;Parti socialiste;gauche_modere;;
54
+ FI;La France insoumise;gauche_dure;;
55
+ ECO;Ecologistes;gauche_modere;;
56
+ DXG;Divers extreme gauche;extreme_gauche;;
57
+ NUP;Nupes;gauche_dure;gauche_modere
58
+ BC-COM;Binome communiste;gauche_dure;
59
+ BC-DVD;Binome divers droite;droite_modere;droite_dure
60
+ BC-ECO;Binome ecologiste;gauche_modere;;
61
+ BC-RN;Binome rassemblement national;extreme_droite;;
data/mappings/category_mapping.csv ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ code_candidature;nom_candidature;bloc_1;bloc_2;bloc_3
2
+ NC;Nuance non communiquee;centre;;
3
+ LDIV;Divers;centre;;
4
+ DIV;Divers;centre;;
5
+ LDVD;Divers droite;droite_modere;droite_dure
6
+ LDVG;Divers gauche;gauche_modere;gauche
7
+ LUG;Union de la gauche;gauche;gauche_modere
8
+ LUD;Union de la droite;droite;droite_modere
9
+ LFN;Front national;extreme_droite;;
10
+ LEXG;Extreme gauche;extreme_gauche;;
11
+ LSOC;Parti socialiste;gauche_modere;gauche
12
+ LUMP;Union pour un mouvement populaire;droite_modere;droite
13
+ LNC;Nouveau centre;centre;;
14
+ LFG;Front de gauche;gauche_dure;gauche
15
+ LVEC;Europe Ecologie Les Verts;gauche_modere;;
16
+ LUDI;Union des democrates et independants;centre;droite_modere
17
+ LDVC;Divers centre;centre;;
18
+ LCOM;Parti communiste;gauche_dure;gauche
19
+ LRN;Rassemblement national;extreme_droite;;
20
+ LUC;Union du centre;centre;;
21
+ LPG;Parti de gauche;gauche_dure;gauche
22
+ LMDM;Mouvement democrate;centre;;
23
+ LLR;Les republicains;droite_modere;droite
24
+ LEXD;Extreme droite;extreme_droite;;
25
+ LREM;La republique en marche;centre;droite_modere
26
+ LFI;La France insoumise;gauche_dure;;
27
+ LECO;Ecologistes;gauche_modere;;
28
+ LREG;Regionalistes;centre;;
29
+ LGJ;Gilets jaunes;gauche;droite
30
+ LRDG;Radicaux de gauche;gauche_modere;centre
31
+ LDLF;Debout la France;droite_dure;droite
32
+ RN;Rassemblement national;extreme_droite;;
33
+ LR;Les republicains;droite_modere;centre
34
+ EELV;Europe Ecologie Les Verts;gauche_modere;;
35
+ PS;Parti socialiste;gauche_modere;;
36
+ UDI;Union des democrates et independants;centre;droite_modere
37
+ PRG;Parti radical de gauche;gauche_modere;centre
38
+ DVD;Divers droite;droite_modere;droite_dure
39
+ DVG;Divers gauche;gauche_modere;gauche
docker-compose.yml ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: "3.9"
2
+
3
+ services:
4
+ postgres:
5
+ image: postgres:16
6
+ container_name: elections_postgres
7
+ restart: unless-stopped
8
+ env_file: .env
9
+ environment:
10
+ - POSTGRES_USER=${POSTGRES_USER}
11
+ - POSTGRES_PASSWORD=${POSTGRES_PASSWORD}
12
+ - POSTGRES_DB=${POSTGRES_DB}
13
+ ports:
14
+ - "${POSTGRES_PORT:-5432}:5432"
15
+ volumes:
16
+ - pgdata:/var/lib/postgresql/data
17
+
18
+ pgadmin:
19
+ image: dpage/pgadmin4:8
20
+ container_name: elections_pgadmin
21
+ restart: unless-stopped
22
+ depends_on:
23
+ - postgres
24
+ env_file: .env
25
+ environment:
26
+ PGADMIN_DEFAULT_EMAIL: admin@sete.fr
27
+ PGADMIN_DEFAULT_PASSWORD: admin
28
+ PGADMIN_LISTEN_PORT: 8080
29
+ ports:
30
+ - "8080:8080"
31
+ volumes:
32
+ - pgadmin_data:/var/lib/pgadmin
33
+ profiles:
34
+ - admin
35
+
36
+ volumes:
37
+ pgdata:
38
+ pgadmin_data:
harmoniser.md ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ % Vot/Ins -> % Votants
2
+ Code du d°partement -> Code du département
3
+
4
+ Exprim°s -> Exprim°s 1 -> Exprimés
5
+ Libell° Abr°g° Liste 1, Libellé abrégé de liste 1 -> Libellé Abrégé Liste 1
6
+
7
+ Libell° Etendu Liste 1, Liste, Libellé de liste 1, Liste.1 -> Libellé Etendu Liste 1
8
+
9
+ Libell° de la circonscription, Libellé de la circonscription
10
+
11
+ Libell° de la commune, Libellé commune -> Libellé de la commune
12
+
13
+ Libell° du d°partement, Libellé département -> Libellé du département
14
+
15
+ Nom candidat 1, Nom Tête de Liste 1, Nom T°te de Liste 1, Nom.1 -> Nom 1
16
+
17
+ Pr°nom du candidat 1, Pr°nom du candidat t°te de liste, Pr°nom.1 -> Prénom 1
18
+
19
+ N°Panneau 1, N.Pan. 1 -> N°Panneau 1
main.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import subprocess
5
+ import sys
6
+ from pathlib import Path
7
+
8
+
9
+ PROJECT_ROOT = Path(__file__).resolve().parent
10
+ PYTHON = sys.executable
11
+
12
+
13
+ def run_step(cmd: list[str], desc: str) -> None:
14
+ print(f"\n=== {desc} ===")
15
+ result = subprocess.run(cmd, check=False)
16
+ if result.returncode != 0:
17
+ raise SystemExit(f"Echec de l'étape '{desc}' (code {result.returncode}). Commande: {' '.join(cmd)}")
18
+
19
+
20
+ def main() -> None:
21
+ parser = argparse.ArgumentParser(
22
+ description="Pipeline orchestration: preprocess -> features -> train -> predict",
23
+ )
24
+ parser.add_argument("--raw-dir", type=Path, default=Path("data/raw"), help="Répertoire des fichiers bruts.")
25
+ parser.add_argument("--mapping", type=Path, default=Path("config/nuances.yaml"), help="Mapping nuances->catégories.")
26
+ parser.add_argument("--target-election", type=str, default="municipales", help="Election cible (ex: municipales).")
27
+ parser.add_argument("--target-year", type=int, default=2026, help="Année cible.")
28
+ parser.add_argument("--commune-code", type=str, default="301", help="Code commune pour la prédiction (Sète=301).")
29
+ parser.add_argument("--skip-preprocess", action="store_true", help="Ne pas relancer le prétraitement.")
30
+ parser.add_argument("--skip-features", action="store_true", help="Ne pas reconstruire le panel.")
31
+ parser.add_argument("--skip-train", action="store_true", help="Ne pas réentraîner le modèle.")
32
+ parser.add_argument("--skip-predict", action="store_true", help="Ne pas générer les prédictions CSV.")
33
+ args = parser.parse_args()
34
+
35
+ interim_path = PROJECT_ROOT / "data" / "interim" / "elections_long.parquet"
36
+ panel_path = PROJECT_ROOT / "data" / "processed" / "panel.parquet"
37
+ model_path = PROJECT_ROOT / "models" / "hist_gradient_boosting.joblib"
38
+
39
+ if not args.skip_preprocess:
40
+ run_step(
41
+ [
42
+ PYTHON,
43
+ "-m",
44
+ "src.data.preprocess",
45
+ "--raw-dir",
46
+ str(args.raw_dir),
47
+ "--output-dir",
48
+ str(PROJECT_ROOT / "data" / "interim"),
49
+ ],
50
+ "Prétraitement (format long)",
51
+ )
52
+
53
+ if not args.skip_features:
54
+ run_step(
55
+ [
56
+ PYTHON,
57
+ "-m",
58
+ "src.features.build_features",
59
+ "--elections-long",
60
+ str(interim_path),
61
+ "--mapping",
62
+ str(args.mapping),
63
+ "--output",
64
+ str(panel_path),
65
+ "--output-csv",
66
+ str(PROJECT_ROOT / "data" / "processed" / "panel.csv"),
67
+ ],
68
+ "Construction du panel features+cibles",
69
+ )
70
+
71
+ if not args.skip_train:
72
+ run_step(
73
+ [
74
+ PYTHON,
75
+ "-m",
76
+ "src.model.train",
77
+ "--panel",
78
+ str(panel_path),
79
+ "--reports-dir",
80
+ str(PROJECT_ROOT / "reports"),
81
+ "--models-dir",
82
+ str(PROJECT_ROOT / "models"),
83
+ ],
84
+ "Entraînement / évaluation des modèles",
85
+ )
86
+
87
+ if not args.skip_predict:
88
+ run_step(
89
+ [
90
+ PYTHON,
91
+ "-m",
92
+ "src.model.predict",
93
+ "--model-path",
94
+ str(model_path),
95
+ "--feature-columns",
96
+ str(PROJECT_ROOT / "models" / "feature_columns.json"),
97
+ "--elections-long",
98
+ str(interim_path),
99
+ "--mapping",
100
+ str(args.mapping),
101
+ "--target-election-type",
102
+ args.target_election,
103
+ "--target-year",
104
+ str(args.target_year),
105
+ "--commune-code",
106
+ args.commune_code,
107
+ "--output-dir",
108
+ str(PROJECT_ROOT / "predictions"),
109
+ ],
110
+ "Génération des prédictions CSV",
111
+ )
112
+
113
+ print("\nPipeline terminé. Lance Gradio avec `python -m app.gradio_app`.")
114
+
115
+
116
+ if __name__ == "__main__":
117
+ main()
mission.md ADDED
@@ -0,0 +1,410 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Mission
2
+
3
+ ## Étape 1
4
+
5
+ Nous créons un pipeline qui consiste à prendre en entrée des dataframes au format csv et qui les intègre dans une base de données.
6
+
7
+ La base de données comprends toujours la liste des bureaux de vote de toute la France et tout nouveau dataframe rajouterait des colonnes.
8
+
9
+ Dans un premier temps, on s'assure que le fichier soit importé et normalisé pour être conforme aux à la base de données pour s'assurer que la fusion puisse se passer.
10
+
11
+ Dans un second temps le dataset est fusionné.
12
+
13
+ ## Ancien
14
+
15
+ Tu es OpenAI Codex dans VS Code. Tu travailles dans un repo Python existant contenant des notebooks et des données dans data/raw, data/interim, data/processed. Objectif métier : au cabinet du maire de Sète, construire un outil prédictif des prochaines municipales (ex: 2026) bureau de vote par bureau de vote, basé sur l’historique électoral et une comparaison au national, puis exposer le tout via une application Gradio. Le projet doit rester opérant à long terme pour les échéances futures (pas “codé en dur” uniquement pour 2026).
16
+
17
+ Contexte fonctionnel (à respecter strictement)
18
+
19
+ Commune principale : Sète (outil centré sur Sète). Prévoir configuration pour étendre à d’autres communes ultérieurement (sans casser l’architecture).
20
+
21
+ L’utilisateur de Gradio choisit :
22
+
23
+ un bureau de vote
24
+
25
+ une élection cible à observer (par défaut : municipales 2026, mais l’UI et le backend doivent accepter n’importe quel couple (type, année) présent / futur)
26
+
27
+ Gradio renvoie :
28
+
29
+ le score prédit (%) pour chaque catégorie de candidats
30
+
31
+ entre parenthèses à côté de chaque score, la différence (en points) vs :
32
+
33
+ la dernière élection législative avant l’élection cible (dans le contexte “municipales 2026”, c’est typiquement les législatives les plus récentes avant 2026)
34
+
35
+ les municipales 2020
36
+
37
+ Catégories à utiliser (cibles et affichage) :
38
+
39
+ centre
40
+
41
+ gauche_modere
42
+
43
+ droite_modere
44
+
45
+ gauche_dure
46
+
47
+ droite_dure
48
+
49
+ extreme_gauche
50
+
51
+ extreme_droite
52
+
53
+ Données & notebooks existants
54
+
55
+ Les fichiers 01_pretraitement et 02_feature_engineering existent (notebooks dans notebooks/) et ont déjà fait un premier nettoyage / feature engineering.
56
+
57
+ Étape 1 : vérifier que ces notebooks sont cohérents avec l’objectif final (prédire municipales 2026 + long terme + bureau par bureau + comparaisons national/local), puis industrialiser : extraire la logique dans des modules Python versionnés sous src/.
58
+
59
+ Les datasets bruts sont dans data/raw. data/interim et data/processed sont disponibles et doivent être utilisés si pertinents (ne pas refaire inutilement ce qui existe déjà, mais corriger si c’est incohérent).
60
+
61
+ Exigences méthodologiques non négociables
62
+ 1) Anti-fuite temporelle (time leakage)
63
+
64
+ Pour prédire une élection cible (type, année = T), les features doivent être calculées uniquement avec des données strictement antérieures à T.
65
+
66
+ Interdiction d’utiliser des résultats de l’élection cible dans les features.
67
+
68
+ Les “écarts au national” doivent être calculés uniquement pour des élections antérieures, avec le score national correspondant à ces élections antérieures.
69
+
70
+ La validation doit respecter la causalité (split temporel).
71
+
72
+ 2) Structure des données adaptée (panel)
73
+
74
+ Ne pas rester sur “1 ligne = 1 bureau” wide naïf si cela empêche l’apprentissage.
75
+ Implémenter un dataset panel conceptuellement : 1 ligne = (bureau, election_type, election_year) avec :
76
+
77
+ cibles : parts de voix (%) par catégorie
78
+
79
+ features : historiques laggés, écarts national antérieurs, participation antérieure, etc.
80
+
81
+ 3) Contraintes de sortie
82
+
83
+ Les prédictions sont des % par catégorie :
84
+
85
+ clip à [0, 100]
86
+
87
+ renormaliser pour sommer à 100 (gérer somme=0)
88
+ Alternative bonus : modéliser via log-ratios + softmax, mais renormalisation simple acceptable.
89
+
90
+ Étape 1 — Audit & industrialisation des notebooks
91
+
92
+ Lire et analyser notebooks/01_pretraitement.* et notebooks/02_feature_engineering.*.
93
+
94
+ Produire un diagnostic succinct (dans reports/notebook_audit.md) :
95
+
96
+ quelles tables/colonnes sont produites ?
97
+
98
+ est-ce compatible avec “bureau×élection” ?
99
+
100
+ existe-t-il des risques de leakage ?
101
+
102
+ est-ce centré sur Sète ou multi-communes ?
103
+
104
+ Refactorer en code production :
105
+
106
+ src/data/preprocess.py : chargement, nettoyage, normalisation des identifiants (commune, bureau), harmonisation des colonnes, gestion des tours (si présents).
107
+
108
+ src/features/build_features.py : construction des features “safe” et panel dataset.
109
+
110
+ Scripts CLI : python -m src.data.preprocess ..., python -m src.features.build_features ...
111
+
112
+ Générer (ou régénérer si nécessaire) un dataset final standard :
113
+
114
+ data/processed/panel.parquet
115
+
116
+ et un dictionnaire de données data/processed/data_dictionary.md
117
+
118
+ Étape 2 — Base PostgreSQL pour l’historique (utilisée par Gradio)
119
+
120
+ Construire une base PostgreSQL (docker-compose recommandé) qui stocke l’historique complet et permet de requêter rapidement par bureau.
121
+
122
+ 2.1 Livrables techniques DB
123
+
124
+ docker-compose.yml lançant Postgres + un outil admin optionnel (pgAdmin facultatif).
125
+
126
+ .env.example pour config DB (host, port, user, password, dbname).
127
+
128
+ Schéma SQL (via Alembic OU SQLAlchemy create_all) versionné dans src/db/.
129
+
130
+ 2.2 Modèle de données (proposition minimale à implémenter)
131
+
132
+ Tables conseillées (adapter si nécessaire, mais rester normalisé) :
133
+
134
+ communes : id, name_normalized, insee_code (si dispo)
135
+
136
+ bureaux : id, commune_id, bureau_code, bureau_label (si dispo), UNIQUE(commune_id, bureau_code)
137
+
138
+ elections : id, election_type, election_year, round (nullable), date (nullable), UNIQUE(type, year, round)
139
+
140
+ categories : id, name (les 7 catégories)
141
+
142
+ results_local : id, bureau_id, election_id, category_id, share_pct, votes (nullable), expressed (nullable), turnout_pct (nullable)
143
+
144
+ results_national : id, election_id, category_id, share_pct, votes (nullable), expressed (nullable), turnout_pct (nullable)
145
+
146
+ 2.3 Ingestion / ETL vers Postgres
147
+
148
+ Créer src/db/ingest.py :
149
+
150
+ lit les données depuis data/processed (préféré) sinon reconstruit depuis data/raw via preprocess + features.
151
+
152
+ insère/upsère idempotent :
153
+
154
+ communes, bureaux, elections, categories
155
+
156
+ résultats locaux et nationaux
157
+
158
+ logs clairs + contrôles de cohérence (ex: somme des parts ≈ 100, votes ≤ exprimés, etc.)
159
+
160
+ script CLI : python -m src.db.ingest --input data/processed/panel.parquet
161
+
162
+ Étape 3 — Modélisation & prédiction
163
+
164
+ Construire un entraînement robuste + stockage des artefacts + prédiction par bureau.
165
+
166
+ 3.1 Cibles
167
+
168
+ Multi-sorties : target_share_<categorie> pour les 7 catégories.
169
+
170
+ 3.2 Features attendues (au minimum)
171
+
172
+ Pour une ligne (bureau, type, year=T) :
173
+
174
+ historiques laggés par catégorie (antérieurs à T)
175
+
176
+ prev_share_<cat>_any_lag1
177
+
178
+ prev_share_<cat>_<type>_lag1 (si existant)
179
+
180
+ écarts au national sur historiques :
181
+
182
+ prev_dev_to_national_<cat>_any_lag1 = prev_share_bureau - prev_share_national (sur l’élection antérieure utilisée)
183
+
184
+ ou par type si disponible
185
+
186
+ participation / abstention historiques si dispos :
187
+
188
+ prev_turnout_any_lag1, etc.
189
+
190
+ variables “swing” :
191
+
192
+ swing_<cat> = prev_share_lag1 - prev_share_lag2 (si lag2 existe)
193
+
194
+ Toutes ces features doivent être calculées sans fuite (join-asof temporel ou logique équivalente).
195
+
196
+ 3.3 Split & évaluation (obligatoire)
197
+
198
+ Interdiction de random split.
199
+
200
+ Implémenter une évaluation temporelle paramétrable, ex :
201
+
202
+ train <= 2017, valid 2019–2021, test >= 2022 (exemple : configurable)
203
+
204
+ Métriques :
205
+
206
+ MAE moyenne sur les 7 catégories
207
+
208
+ MAE par catégorie
209
+
210
+ option : erreur sur “catégorie gagnante”
211
+
212
+ Générer :
213
+
214
+ reports/metrics.json
215
+
216
+ reports/metrics.md
217
+
218
+ quelques figures (matplotlib) dans reports/figures/
219
+
220
+ 3.4 Modèles à entraîner
221
+
222
+ Implémenter au moins :
223
+
224
+ Ridge (baseline interprétable) avec standardisation
225
+
226
+ HistGradientBoostingRegressor (via MultiOutputRegressor si nécessaire)
227
+
228
+ LightGBM / XGBoost / CatBoost si installés (détection automatique, sinon skip proprement)
229
+
230
+ Sauvegarder modèles et preprocessors dans models/ (joblib), avec un model_card.md (date, données, split, features, métriques).
231
+
232
+ 3.5 Prédiction pour une élection cible
233
+
234
+ Créer src/model/predict.py :
235
+
236
+ arguments : --target-election-type, --target-year, --commune (par défaut Sète)
237
+
238
+ produit un CSV :
239
+
240
+ predictions/pred_<type>_<year>_sete.csv
241
+
242
+ colonnes : commune, bureau_code, predicted_share_ (7), + comparateurs (voir ci-dessous)
243
+
244
+ Comparateurs à afficher dans Gradio
245
+
246
+ Pour chaque catégorie, calculer 2 deltas (points de %):
247
+
248
+ vs la dernière législative avant l’élection cible
249
+
250
+ trouver dans la DB l’élection election_type='legislatives' avec année max < target_year (et même round logique si géré)
251
+
252
+ récupérer le share_pct du bureau sur cette législative (par catégorie)
253
+
254
+ delta_leg = predicted_share - share_leg
255
+
256
+ vs les municipales 2020
257
+
258
+ si target_year != 2020 : récupérer election_type='municipales' et election_year=2020 pour ce bureau
259
+
260
+ delta_mun2020 = predicted_share - share_mun2020
261
+ Si une référence manque (bureau absent, données manquantes), afficher “N/A” au lieu du delta.
262
+
263
+ Étape 4 — Application Gradio
264
+
265
+ Créer une app Gradio production-ready dans app/gradio_app.py.
266
+
267
+ 4.1 UI
268
+
269
+ Titre : “Prévision Municipales — Ville de Sète”
270
+
271
+ Inputs :
272
+
273
+ Dropdown bureau : liste des bureaux disponibles pour Sète (requête DB)
274
+
275
+ Dropdown election : couples (type, année) cibles (par défaut municipale 2026, mais liste configurable). Si 2026 n’existe pas en DB, elle doit pouvoir être sélectionnée quand même comme “cible future”.
276
+
277
+ Bouton : “Prédire”
278
+
279
+ 4.2 Sorties
280
+
281
+ Afficher :
282
+
283
+ Un tableau (pandas dataframe ou composant gradio) avec 7 lignes (catégories) :
284
+
285
+ categorie
286
+
287
+ score_predit_%
288
+
289
+ Δ vs législatives (dernières) (en points)
290
+
291
+ Δ vs municipales 2020 (en points)
292
+
293
+ Option bonus : un bar chart matplotlib des scores prédits par catégorie (simple, lisible).
294
+
295
+ Format texte exigé (si rendu texte au lieu de tableau) :
296
+
297
+ centre : 21.3% (+1.2 vs législatives, -0.8 vs mun 2020)
298
+
299
+ et ainsi de suite
300
+ Avec N/A si delta indisponible.
301
+
302
+ 4.3 Backend
303
+
304
+ L’app ne doit pas recalculer tout le dataset à chaque clic.
305
+
306
+ Au démarrage :
307
+
308
+ se connecte à Postgres
309
+
310
+ charge le modèle entraîné + preprocessor
311
+
312
+ Lors d’une prédiction :
313
+
314
+ récupère les features “safe” du bureau pour la cible (type, année) :
315
+
316
+ soit via une table features pré-calculées,
317
+
318
+ soit en construisant “à la volée” depuis l’historique DB (mais de manière efficace et sans fuite)
319
+
320
+ applique modèle → prédictions → post-traitement (clip + renormalisation)
321
+
322
+ calcule deltas vs références (législatives max<target_year, municipales 2020)
323
+
324
+ renvoie la table + graph
325
+
326
+ Architecture attendue du repo
327
+
328
+ Créer / compléter l’arborescence :
329
+
330
+ src/
331
+
332
+ data/
333
+
334
+ features/
335
+
336
+ db/
337
+
338
+ model/
339
+
340
+ utils/
341
+
342
+ app/
343
+
344
+ gradio_app.py
345
+
346
+ data/raw/ (existant)
347
+
348
+ data/interim/ (existant)
349
+
350
+ data/processed/ (existant)
351
+
352
+ models/
353
+
354
+ predictions/
355
+
356
+ reports/
357
+
358
+ notebooks/ (existant)
359
+
360
+ Inclure :
361
+
362
+ README.md très clair avec commandes :
363
+
364
+ (a) preprocess/build_features
365
+
366
+ (b) lancer Postgres
367
+
368
+ (c) ingest DB
369
+
370
+ (d) train/evaluate
371
+
372
+ (e) lancer Gradio
373
+
374
+ requirements.txt ou pyproject.toml
375
+
376
+ logs (INFO) + messages d’erreur actionnables (ex : DB down, modèle absent, fichiers manquants)
377
+
378
+ code robuste si data/raw vide : doit expliquer quoi mettre et comment nommer.
379
+
380
+ Points d’attention “réels”
381
+
382
+ gérer bureaux absents certaines années → imputation + deltas N/A
383
+
384
+ gérer harmonisation des libellés bureau → normalisation + warning
385
+
386
+ gérer tours (T1/T2) : inclure colonne round ou config, et éviter mélange non intentionnel
387
+
388
+ le mapping “candidat/nuance -> catégorie” est critique :
389
+
390
+ prévoir data/mappings/category_mapping.csv (ou YAML) et documenter la logique
391
+
392
+ tout non-mappé -> autres puis redistribuer/ignorer selon règle explicite (mais comme les catégories sont imposées, définir une stratégie : soit exclure “autres” du modèle, soit le répartir, soit le conserver et renormaliser sur 7 catégories — choisir une approche et la documenter)
393
+
394
+ Livrables finaux attendus
395
+
396
+ Code complet (modules + scripts CLI)
397
+
398
+ Schéma DB + docker-compose + script ingestion
399
+
400
+ Pipeline entraînement/évaluation + artefacts modèles
401
+
402
+ Application Gradio fonctionnelle
403
+
404
+ Exemples de fichiers mapping :
405
+
406
+ data/mappings/category_mapping.csv
407
+
408
+ Documentation complète dans README
409
+
410
+ Ne pas inventer de données. Travailler avec l’existant (data/interim, data/processed, notebooks), corriger si incohérent, et rendre l’ensemble production-ready (reproductible, configurable, sans fuite temporelle).
models/best_model.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "name": "hist_gradient_boosting"
3
+ }
models/feature_columns.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ "prev_share_any_lag1_centre",
3
+ "prev_share_any_lag1_droite_dure",
4
+ "prev_share_any_lag1_droite_modere",
5
+ "prev_share_any_lag1_extreme_droite",
6
+ "prev_share_any_lag1_extreme_gauche",
7
+ "prev_share_any_lag1_gauche_dure",
8
+ "prev_share_any_lag1_gauche_modere",
9
+ "prev_share_type_lag1_centre",
10
+ "prev_share_type_lag1_droite_dure",
11
+ "prev_share_type_lag1_droite_modere",
12
+ "prev_share_type_lag1_extreme_droite",
13
+ "prev_share_type_lag1_extreme_gauche",
14
+ "prev_share_type_lag1_gauche_dure",
15
+ "prev_share_type_lag1_gauche_modere",
16
+ "prev_dev_to_national_any_lag1_centre",
17
+ "prev_dev_to_national_any_lag1_droite_dure",
18
+ "prev_dev_to_national_any_lag1_droite_modere",
19
+ "prev_dev_to_national_any_lag1_extreme_droite",
20
+ "prev_dev_to_national_any_lag1_extreme_gauche",
21
+ "prev_dev_to_national_any_lag1_gauche_dure",
22
+ "prev_dev_to_national_any_lag1_gauche_modere",
23
+ "prev_dev_to_national_type_lag1_centre",
24
+ "prev_dev_to_national_type_lag1_droite_dure",
25
+ "prev_dev_to_national_type_lag1_droite_modere",
26
+ "prev_dev_to_national_type_lag1_extreme_droite",
27
+ "prev_dev_to_national_type_lag1_extreme_gauche",
28
+ "prev_dev_to_national_type_lag1_gauche_dure",
29
+ "prev_dev_to_national_type_lag1_gauche_modere",
30
+ "swing_any_centre",
31
+ "swing_any_droite_dure",
32
+ "swing_any_droite_modere",
33
+ "swing_any_extreme_droite",
34
+ "swing_any_extreme_gauche",
35
+ "swing_any_gauche_dure",
36
+ "swing_any_gauche_modere",
37
+ "turnout_pct",
38
+ "prev_turnout_any_lag1",
39
+ "prev_turnout_same_type_lag1"
40
+ ]
models/hist_gradient_boosting.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91189f0a1fa5876b60b75e54293f093023d12f1f32ee5e3076aa648659bf7afd
3
+ size 2676501
models/model_card.md ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # Model card
2
+ - Modèle: hist_gradient_boosting
3
+ - Split temporel: train<= 2019, valid<= 2021, test>= 2022
4
+ - Features: 38 colonnes numériques (lags, écarts national, swing, turnout)
5
+ - Cibles: parts par bloc (7 catégories) renormalisées.
6
+ - Métriques principales (MAE moyen, jeux valid/test):
7
+ - Valid: 0.1233
8
+ - Test: 0.1146
requirements.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pandas>=2.2.0
2
+ numpy>=1.26.0
3
+ sqlalchemy>=2.0.0
4
+ psycopg2-binary>=2.9.9
5
+ gradio>=4.0.0
6
+ pyarrow>=15.0.0
7
+ scikit-learn>=1.4.0
8
+ # Modèles gradient boosting / multi-output recommandés pour la prédiction bureau de vote
9
+ lightgbm>=4.3.0
10
+ xgboost>=2.0.0
11
+ catboost>=1.2.5
12
+ shap>=0.45.0
13
+ pyyaml>=6.0.0
14
+ matplotlib>=3.8.0
15
+ folium>=0.16.0
src/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Makes src a package so notebooks can import src.data_prep
src/constants.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ # Canonical blocs/categories to surface in the app outputs (7 cibles)
4
+ CANDIDATE_CATEGORIES = [
5
+ "centre",
6
+ "gauche_modere",
7
+ "droite_modere",
8
+ "gauche_dure",
9
+ "droite_dure",
10
+ "extreme_gauche",
11
+ "extreme_droite",
12
+ ]
13
+
14
+ # Numeric columns used across the pipeline and DB ingestion
15
+ NUMERIC_COLUMNS = [
16
+ "voix_bloc",
17
+ "exprimes",
18
+ "inscrits",
19
+ "votants",
20
+ "blancs",
21
+ "nuls",
22
+ "part_bloc",
23
+ "part_bloc_national",
24
+ "taux_participation_national",
25
+ "taux_participation_bv",
26
+ "taux_blancs_bv",
27
+ "taux_nuls_bv",
28
+ "ecart_bloc_vs_national",
29
+ "ecart_participation_vs_nat",
30
+ "croissance_inscrits_depuis_base",
31
+ "part_bloc_lag1",
32
+ "ecart_bloc_vs_national_lag1",
33
+ "taux_participation_bv_lag1",
34
+ "annee_centre",
35
+ ]
src/data/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ """
2
+ Data subpackage: preprocessing helpers and CLI entrypoints.
3
+ """
src/data/preprocess.py ADDED
@@ -0,0 +1,481 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import json
5
+ import logging
6
+ from pathlib import Path
7
+ from typing import Any, Dict, Iterable, Mapping
8
+
9
+ import pandas as pd
10
+
11
+ from src import data_prep
12
+
13
+ LOGGER = logging.getLogger(__name__)
14
+
15
+
16
+ DEFAULT_META_CONFIG: Dict[str, Dict[str, Any]] = {
17
+ "14_EU.csv": {
18
+ "type_scrutin": "europeennes",
19
+ "date_scrutin": "2014-05-25",
20
+ "tour_column": "N° tour",
21
+ "code_bv_cols": ["Code de la commune", "N° de bureau de vote"],
22
+ "rename_map": {
23
+ "Inscrits": "inscrits",
24
+ "Votants": "votants",
25
+ "Exprimés": "exprimes",
26
+ "Exprimés": "exprimes",
27
+ "Nombre de voix du candidat": "voix",
28
+ "Voix": "voix",
29
+ "Nom du candidat": "nom_candidature",
30
+ "Prénom du candidat": "nom_candidature",
31
+ "Code nuance du candidat": "code_candidature",
32
+ },
33
+ },
34
+ "14_MN14_T1T2.csv": {
35
+ "type_scrutin": "municipales",
36
+ "date_scrutin": "2014-03-23",
37
+ "tour_column": "N° tour",
38
+ "code_bv_cols": ["Code commune", "N° de bureau de vote"],
39
+ "rename_map": {
40
+ "Inscrits": "inscrits",
41
+ "Votants": "votants",
42
+ "Exprimés": "exprimes",
43
+ "Nombre de voix": "voix",
44
+ "Nom du candidat tête de liste": "nom_candidature",
45
+ "Prénom du candidat tête de liste": "nom_candidature",
46
+ "Code nuance de la liste": "code_candidature",
47
+ },
48
+ },
49
+ "17_L_T1.csv": {
50
+ "type_scrutin": "legislatives",
51
+ "date_scrutin": "2017-06-11",
52
+ "tour": 1,
53
+ "code_bv_cols": ["Code de la commune", "Code du b.vote"],
54
+ "rename_map": {
55
+ "Inscrits": "inscrits",
56
+ "Abstentions": "abstentions",
57
+ "Votants": "votants",
58
+ "Blancs": "blancs",
59
+ "Nuls": "nuls",
60
+ "Exprimés": "exprimes",
61
+ "Voix": "voix",
62
+ "Nuance": "code_candidature",
63
+ "Nom": "nom_candidature",
64
+ },
65
+ },
66
+ "17_L_T2.csv": {
67
+ "type_scrutin": "legislatives",
68
+ "date_scrutin": "2017-06-18",
69
+ "tour": 2,
70
+ "code_bv_cols": ["Code de la commune", "Code du b.vote"],
71
+ "rename_map": {
72
+ "Inscrits": "inscrits",
73
+ "Abstentions": "abstentions",
74
+ "Votants": "votants",
75
+ "Blancs": "blancs",
76
+ "Nuls": "nuls",
77
+ "Exprimés": "exprimes",
78
+ "Voix": "voix",
79
+ "Nuance": "code_candidature",
80
+ "Nom": "nom_candidature",
81
+ },
82
+ },
83
+ "17_PR_T1.csv": {
84
+ "type_scrutin": "presidentielles",
85
+ "date_scrutin": "2017-04-23",
86
+ "tour": 1,
87
+ "code_bv_cols": ["Code de la commune", "Code du b.vote"],
88
+ "rename_map": {
89
+ "Inscrits": "inscrits",
90
+ "Abstentions": "abstentions",
91
+ "Votants": "votants",
92
+ "Blancs": "blancs",
93
+ "Nuls": "nuls",
94
+ "Exprimés": "exprimes",
95
+ "Voix": "voix",
96
+ "Nom": "nom_candidature",
97
+ "Code nuance du candidat": "code_candidature",
98
+ },
99
+ },
100
+ "17_PR_T2.csv": {
101
+ "type_scrutin": "presidentielles",
102
+ "date_scrutin": "2017-05-07",
103
+ "tour": 2,
104
+ "code_bv_cols": ["Code de la commune", "Code du b.vote"],
105
+ "rename_map": {
106
+ "Inscrits": "inscrits",
107
+ "Abstentions": "abstentions",
108
+ "Votants": "votants",
109
+ "Blancs": "blancs",
110
+ "Nuls": "nuls",
111
+ "Exprimés": "exprimes",
112
+ "Voix": "voix",
113
+ "Nom": "nom_candidature",
114
+ "Code nuance du candidat": "code_candidature",
115
+ },
116
+ },
117
+ "19_EU.csv": {
118
+ "type_scrutin": "europeennes",
119
+ "date_scrutin": "2019-05-26",
120
+ "tour": 1,
121
+ "code_bv_cols": ["Code de la commune", "Code du b.vote"],
122
+ "rename_map": {
123
+ "Inscrits": "inscrits",
124
+ "Abstentions": "abstentions",
125
+ "Votants": "votants",
126
+ "Blancs": "blancs",
127
+ "Nuls": "nuls",
128
+ "Exprimés": "exprimes",
129
+ "Voix": "voix",
130
+ "Nom Tête de Liste": "nom_candidature",
131
+ "Nuance Liste": "code_candidature",
132
+ },
133
+ },
134
+ "20_MN_T1.csv": {
135
+ "type_scrutin": "municipales",
136
+ "date_scrutin": "2020-03-15",
137
+ "tour": 1,
138
+ "sep": ";",
139
+ "code_bv_cols": ["Code de la commune", "Code B.Vote"],
140
+ "rename_map": {
141
+ "Inscrits": "inscrits",
142
+ "Abstentions": "abstentions",
143
+ "Votants": "votants",
144
+ "Blancs": "blancs",
145
+ "Nuls": "nuls",
146
+ "Exprimés": "exprimes",
147
+ "Voix": "voix",
148
+ "Nom": "nom_candidature",
149
+ "Liste": "nom_candidature",
150
+ "Code Nuance": "code_candidature",
151
+ },
152
+ },
153
+ "20_MN_T2.csv": {
154
+ "type_scrutin": "municipales",
155
+ "date_scrutin": "2020-06-28",
156
+ "tour": 2,
157
+ "code_bv_cols": ["Code de la commune", "Code B.Vote"],
158
+ "rename_map": {
159
+ "Inscrits": "inscrits",
160
+ "Abstentions": "abstentions",
161
+ "Votants": "votants",
162
+ "Blancs": "blancs",
163
+ "Nuls": "nuls",
164
+ "Exprimés": "exprimes",
165
+ "Voix": "voix",
166
+ "Nom": "nom_candidature",
167
+ "Liste": "nom_candidature",
168
+ "Code Nuance": "code_candidature",
169
+ },
170
+ },
171
+ "21_DEP_T1.csv": {
172
+ "type_scrutin": "departementales",
173
+ "date_scrutin": "2021-06-20",
174
+ "tour": 1,
175
+ "code_bv_cols": ["Code de la commune", "Code du b.vote"],
176
+ "rename_map": {
177
+ "Inscrits": "inscrits",
178
+ "Abstentions": "abstentions",
179
+ "Votants": "votants",
180
+ "Blancs": "blancs",
181
+ "Nuls": "nuls",
182
+ "Exprimés": "exprimes",
183
+ "Voix": "voix",
184
+ "Nuance": "code_candidature",
185
+ "Binôme": "nom_candidature",
186
+ },
187
+ },
188
+ "21_DEP_T2.csv": {
189
+ "type_scrutin": "departementales",
190
+ "date_scrutin": "2021-06-27",
191
+ "tour": 2,
192
+ "code_bv_cols": ["Code de la commune", "Code du b.vote"],
193
+ "rename_map": {
194
+ "Inscrits": "inscrits",
195
+ "Abstentions": "abstentions",
196
+ "Votants": "votants",
197
+ "Blancs": "blancs",
198
+ "Nuls": "nuls",
199
+ "Exprimés": "exprimes",
200
+ "Voix": "voix",
201
+ "Nuance": "code_candidature",
202
+ "Binôme": "nom_candidature",
203
+ },
204
+ },
205
+ "21_REG_T1.csv": {
206
+ "type_scrutin": "regionales",
207
+ "date_scrutin": "2021-06-20",
208
+ "tour": 1,
209
+ "code_bv_cols": ["Code de la commune", "Code du b.vote"],
210
+ "rename_map": {
211
+ "Inscrits": "inscrits",
212
+ "Abstentions": "abstentions",
213
+ "Votants": "votants",
214
+ "Blancs": "blancs",
215
+ "Nuls": "nuls",
216
+ "Exprimés": "exprimes",
217
+ "Voix": "voix",
218
+ "Nuance Liste": "code_candidature",
219
+ "Libellé Abrégé Liste": "nom_candidature",
220
+ },
221
+ },
222
+ "21_REG_T2.csv": {
223
+ "type_scrutin": "regionales",
224
+ "date_scrutin": "2021-06-27",
225
+ "tour": 2,
226
+ "code_bv_cols": ["Code de la commune", "Code du b.vote"],
227
+ "rename_map": {
228
+ "Inscrits": "inscrits",
229
+ "Abstentions": "abstentions",
230
+ "Votants": "votants",
231
+ "Blancs": "blancs",
232
+ "Nuls": "nuls",
233
+ "Exprimés": "exprimes",
234
+ "Voix": "voix",
235
+ "Nuance Liste": "code_candidature",
236
+ "Libellé Abrégé Liste": "nom_candidature",
237
+ },
238
+ },
239
+ "22_L_T1.csv": {
240
+ "type_scrutin": "legislatives",
241
+ "date_scrutin": "2022-06-12",
242
+ "tour": 1,
243
+ "code_bv_cols": ["Code de la commune", "Code du b.vote"],
244
+ "rename_map": {
245
+ "Inscrits": "inscrits",
246
+ "Abstentions": "abstentions",
247
+ "Votants": "votants",
248
+ "Blancs": "blancs",
249
+ "Nuls": "nuls",
250
+ "Exprimés": "exprimes",
251
+ "Voix": "voix",
252
+ "Nuance": "code_candidature",
253
+ "Nom": "nom_candidature",
254
+ },
255
+ },
256
+ "22_L_T2.csv": {
257
+ "type_scrutin": "legislatives",
258
+ "date_scrutin": "2022-06-19",
259
+ "tour": 2,
260
+ "code_bv_cols": ["Code de la commune", "Code du b.vote"],
261
+ "rename_map": {
262
+ "Inscrits": "inscrits",
263
+ "Abstentions": "abstentions",
264
+ "Votants": "votants",
265
+ "Blancs": "blancs",
266
+ "Nuls": "nuls",
267
+ "Exprimés": "exprimes",
268
+ "Voix": "voix",
269
+ "Nuance": "code_candidature",
270
+ "Nom": "nom_candidature",
271
+ },
272
+ },
273
+ "22_PR_T1.csv": {
274
+ "type_scrutin": "presidentielles",
275
+ "date_scrutin": "2022-04-10",
276
+ "tour": 1,
277
+ "code_bv_cols": ["Code de la commune", "Code du b.vote"],
278
+ "rename_map": {
279
+ "Inscrits": "inscrits",
280
+ "Abstentions": "abstentions",
281
+ "Votants": "votants",
282
+ "Blancs": "blancs",
283
+ "Nuls": "nuls",
284
+ "Exprimés": "exprimes",
285
+ "Voix": "voix",
286
+ "Nom": "nom_candidature",
287
+ "Code nuance du candidat": "code_candidature",
288
+ },
289
+ },
290
+ "22_PR_T2.csv": {
291
+ "type_scrutin": "presidentielles",
292
+ "date_scrutin": "2022-04-24",
293
+ "tour": 2,
294
+ "code_bv_cols": ["Code de la commune", "Code du b.vote"],
295
+ "rename_map": {
296
+ "Inscrits": "inscrits",
297
+ "Abstentions": "abstentions",
298
+ "Votants": "votants",
299
+ "Blancs": "blancs",
300
+ "Nuls": "nuls",
301
+ "Exprimés": "exprimes",
302
+ "Voix": "voix",
303
+ "Nom": "nom_candidature",
304
+ "Code nuance du candidat": "code_candidature",
305
+ },
306
+ },
307
+ "24_EU.csv": {
308
+ "type_scrutin": "europeennes",
309
+ "date_scrutin": "2024-06-09",
310
+ "tour": 1,
311
+ "code_bv_cols": ["Code commune", "Code BV"],
312
+ "rename_map": {
313
+ "Inscrits": "inscrits",
314
+ "Abstentions": "abstentions",
315
+ "Votants": "votants",
316
+ "Blancs": "blancs",
317
+ "Nuls": "nuls",
318
+ "Exprimés": "exprimes",
319
+ "Voix 1": "voix",
320
+ "Voix": "voix",
321
+ "Nuance liste 1": "code_candidature",
322
+ "Libellé abrégé de liste 1": "nom_candidature",
323
+ },
324
+ },
325
+ "24_L_T1.csv": {
326
+ "type_scrutin": "legislatives",
327
+ "date_scrutin": "2024-06-30",
328
+ "tour": 1,
329
+ "code_bv_cols": ["Code commune", "Code BV"],
330
+ "rename_map": {
331
+ "Inscrits": "inscrits",
332
+ "Abstentions": "abstentions",
333
+ "Votants": "votants",
334
+ "Blancs": "blancs",
335
+ "Nuls": "nuls",
336
+ "Exprimés": "exprimes",
337
+ "Voix": "voix",
338
+ "Nuance Liste": "code_candidature",
339
+ "Libellé Abrégé Liste": "nom_candidature",
340
+ "Binôme": "nom_candidature",
341
+ },
342
+ },
343
+ "24_L_T2.csv": {
344
+ "type_scrutin": "legislatives",
345
+ "date_scrutin": "2024-07-07",
346
+ "tour": 2,
347
+ "code_bv_cols": ["Code commune", "Code BV"],
348
+ "rename_map": {
349
+ "Inscrits": "inscrits",
350
+ "Abstentions": "abstentions",
351
+ "Votants": "votants",
352
+ "Blancs": "blancs",
353
+ "Nuls": "nuls",
354
+ "Exprimés": "exprimes",
355
+ "Voix": "voix",
356
+ "Nuance Liste": "code_candidature",
357
+ "Libellé Abrégé Liste": "nom_candidature",
358
+ "Binôme": "nom_candidature",
359
+ },
360
+ },
361
+ }
362
+
363
+ DEFAULT_META_CONFIG_PATH = Path(__file__).resolve().parents[2] / "config" / "raw_sources.yaml"
364
+
365
+
366
+ def _resolve_meta_config(raw: Mapping[str, Mapping[str, Any]]) -> Dict[str, Dict[str, Any]]:
367
+ resolved: Dict[str, Dict[str, Any]] = {}
368
+
369
+ def resolve_one(key: str, stack: list[str]) -> Dict[str, Any]:
370
+ if key in resolved:
371
+ return resolved[key]
372
+ if key in stack:
373
+ raise ValueError(f"Cycle detecte dans meta-config: {' -> '.join(stack + [key])}")
374
+ meta = dict(raw[key])
375
+ base_key = meta.pop("copy_from", None)
376
+ if base_key:
377
+ if base_key not in raw:
378
+ raise KeyError(f"copy_from cible introuvable: {base_key}")
379
+ base = resolve_one(base_key, stack + [key])
380
+ merged = dict(base)
381
+ rename_base = dict(base.get("rename_map", {}))
382
+ rename_override = dict(meta.get("rename_map", {}))
383
+ merged.update(meta)
384
+ if rename_base or rename_override:
385
+ merged["rename_map"] = {**rename_base, **rename_override}
386
+ resolved[key] = merged
387
+ else:
388
+ resolved[key] = meta
389
+ return resolved[key]
390
+
391
+ for name in raw:
392
+ resolve_one(name, [])
393
+ return resolved
394
+
395
+
396
+ def load_meta_config(meta_path: Path | None) -> Dict[str, Dict[str, Any]]:
397
+ if meta_path is None:
398
+ if DEFAULT_META_CONFIG_PATH.exists():
399
+ meta_path = DEFAULT_META_CONFIG_PATH
400
+ else:
401
+ return DEFAULT_META_CONFIG
402
+ if not meta_path.exists():
403
+ raise FileNotFoundError(f"Meta-config file not found: {meta_path}")
404
+ if meta_path.suffix in {".yml", ".yaml"}:
405
+ try:
406
+ import yaml
407
+ except Exception as exc:
408
+ raise RuntimeError("PyYAML is required to read YAML meta-config files.") from exc
409
+ raw = yaml.safe_load(meta_path.read_text()) or {}
410
+ else:
411
+ raw = json.loads(meta_path.read_text())
412
+ if not isinstance(raw, dict):
413
+ raise ValueError("Meta-config invalide: attendu un mapping de fichiers vers meta-donnees.")
414
+ return _resolve_meta_config(raw)
415
+
416
+
417
+ def preprocess_all(raw_dir: Path, output_dir: Path, meta_config: Mapping[str, Mapping[str, Any]]) -> pd.DataFrame:
418
+ frames = []
419
+ missing: list[str] = []
420
+ for file_name, meta in meta_config.items():
421
+ path = raw_dir / file_name
422
+ if not path.exists():
423
+ missing.append(file_name)
424
+ continue
425
+ LOGGER.info("Standardisation de %s", file_name)
426
+ df_std = data_prep.standardize_election(
427
+ path,
428
+ meta,
429
+ rename_map=meta.get("rename_map", {}),
430
+ sep=meta.get("sep", ";"),
431
+ encoding=meta.get("encoding", ("cp1252", "utf-8-sig", "latin-1")),
432
+ decimal=meta.get("decimal", ","),
433
+ ) # type: ignore[arg-type]
434
+ frames.append(df_std)
435
+ if missing:
436
+ LOGGER.warning("Fichiers manquants ignorés: %s", ", ".join(sorted(missing)))
437
+ if not frames:
438
+ raise RuntimeError("Aucune donnée chargée : vérifier le dossier raw et la configuration meta.")
439
+
440
+ elections_long = pd.concat(frames, ignore_index=True)
441
+ elections_long["date_scrutin"] = pd.to_datetime(elections_long["date_scrutin"])
442
+ elections_long["annee"] = elections_long["date_scrutin"].dt.year
443
+ elections_long["type_scrutin"] = elections_long["type_scrutin"].str.lower()
444
+ elections_long["code_commune"] = elections_long["code_bv"].astype(str).str.split("-").str[0]
445
+
446
+ issues = data_prep.validate_consistency(elections_long)
447
+ for name, df_issue in issues.items():
448
+ if len(df_issue) > 0:
449
+ LOGGER.warning("%s : %s lignes a inspecter", name, len(df_issue))
450
+
451
+ output_dir.mkdir(parents=True, exist_ok=True)
452
+ parquet_path = output_dir / "elections_long.parquet"
453
+ csv_path = output_dir / "elections_long.csv"
454
+ elections_long.to_parquet(parquet_path, index=False)
455
+ elections_long.to_csv(csv_path, sep=";", index=False)
456
+ LOGGER.info("Long format sauvegarde (%s lignes) -> %s / %s", len(elections_long), parquet_path, csv_path)
457
+ return elections_long
458
+
459
+
460
+ def parse_args() -> argparse.Namespace:
461
+ parser = argparse.ArgumentParser(description="Prétraitement des fichiers bruts en format long standardisé.")
462
+ parser.add_argument("--raw-dir", type=Path, default=Path("data/raw"), help="Répertoire des fichiers bruts CSV.")
463
+ parser.add_argument("--output-dir", type=Path, default=Path("data/interim"), help="Destination du format long harmonisé.")
464
+ parser.add_argument(
465
+ "--meta-config",
466
+ type=Path,
467
+ default=None,
468
+ help="Chemin vers un fichier JSON/YAML décrivant les meta-données des scrutins. Par défaut, utilise la configuration embarquée.",
469
+ )
470
+ return parser.parse_args()
471
+
472
+
473
+ def main() -> None:
474
+ logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s")
475
+ args = parse_args()
476
+ meta_config = load_meta_config(args.meta_config)
477
+ preprocess_all(args.raw_dir, args.output_dir, meta_config)
478
+
479
+
480
+ if __name__ == "__main__":
481
+ main()
src/data_prep.py ADDED
@@ -0,0 +1,418 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+ import re
5
+ from typing import Dict, Iterable, List, Mapping, Optional
6
+
7
+ import numpy as np
8
+ import pandas as pd
9
+
10
+ # Columns kept across all scrutins
11
+ STANDARD_COLUMNS: List[str] = [
12
+ "code_bv",
13
+ "nom_bv",
14
+ "annee",
15
+ "date_scrutin",
16
+ "type_scrutin",
17
+ "tour",
18
+ "inscrits",
19
+ "votants",
20
+ "abstentions",
21
+ "blancs",
22
+ "nuls",
23
+ "exprimes",
24
+ "code_candidature",
25
+ "nom_candidature",
26
+ "voix",
27
+ ]
28
+
29
+ NUMERIC_COLUMNS = [
30
+ "inscrits",
31
+ "votants",
32
+ "abstentions",
33
+ "blancs",
34
+ "nuls",
35
+ "exprimes",
36
+ "voix",
37
+ ]
38
+
39
+
40
+ _MOJIBAKE_REPLACEMENTS = {
41
+ "é": "é",
42
+ "è": "è",
43
+ "ê": "ê",
44
+ "ë": "ë",
45
+ "Ã ": "à",
46
+ "â": "â",
47
+ "ç": "ç",
48
+ "ù": "ù",
49
+ "û": "û",
50
+ "ï": "ï",
51
+ "ô": "ô",
52
+ "ö": "ö",
53
+ "É": "É",
54
+ "È": "È",
55
+ "Ê": "Ê",
56
+ "Ë": "Ë",
57
+ "À": "À",
58
+ "Â": "Â",
59
+ "Ç": "Ç",
60
+ "�": "°",
61
+ "�": "°",
62
+ }
63
+
64
+
65
+ def _normalize_label(label: str) -> str:
66
+ """
67
+ Attempt to repair mojibake in column labels (UTF-8 read as latin-1 or vice versa).
68
+ """
69
+ fixed = label
70
+ try:
71
+ fixed = label.encode("latin1").decode("utf-8")
72
+ except (UnicodeEncodeError, UnicodeDecodeError):
73
+ fixed = label
74
+ else:
75
+ if "Â" in fixed:
76
+ fixed = fixed.replace("Â", "")
77
+ try:
78
+ # Alternate path: utf-8 bytes decoded as latin1 then re-decoded
79
+ fixed = fixed.encode("utf-8").decode("latin1")
80
+ except (UnicodeEncodeError, UnicodeDecodeError):
81
+ pass
82
+ for bad, good in _MOJIBAKE_REPLACEMENTS.items():
83
+ if bad in fixed:
84
+ fixed = fixed.replace(bad, good)
85
+ fixed = fixed.replace("\ufeff", "") # remove BOM
86
+ fixed = " ".join(fixed.split()) # normalise whitespace
87
+ return fixed
88
+
89
+
90
+ def _canonical_label(label: str) -> str:
91
+ """
92
+ Lowercase alpha-numeric only version of a label for fuzzy matching.
93
+ """
94
+ import re
95
+
96
+ norm = _normalize_label(label).lower()
97
+ return re.sub(r"[^0-9a-z]", "", norm)
98
+
99
+
100
+ def _unpivot_wide_candidates(df: pd.DataFrame) -> pd.DataFrame:
101
+ """
102
+ Detect wide candidate columns (e.g., 'Voix 1', 'Nuance liste 2') and unpivot to long.
103
+ Keeps one row per candidate with standard columns 'voix' and 'code_candidature'.
104
+ """
105
+ pattern = re.compile(r"^(?P<base>.*?)(?:\s+|_)?(?P<idx>\d+)$")
106
+ candidate_map: Dict[str, Dict[str, str]] = {}
107
+ wide_cols: set[str] = set()
108
+ for col in df.columns:
109
+ match = pattern.match(col)
110
+ if not match:
111
+ continue
112
+ wide_cols.add(col)
113
+ base = match.group("base").strip()
114
+ idx = match.group("idx")
115
+ canon = _canonical_label(base)
116
+ field = None
117
+ if canon == "voix":
118
+ field = "voix"
119
+ elif canon in {"nuance", "nuanceliste", "codenuance", "codenuanceducandidat", "codenuanceliste"}:
120
+ field = "code_candidature"
121
+ if field:
122
+ candidate_map.setdefault(idx, {})[field] = col
123
+
124
+ indices = [
125
+ idx for idx, fields in candidate_map.items()
126
+ if {"voix", "code_candidature"}.issubset(fields.keys())
127
+ ]
128
+ if len(indices) <= 1:
129
+ return df
130
+
131
+ candidate_cols = {col for fields in candidate_map.values() for col in fields.values()}
132
+ base_cols = [c for c in df.columns if c not in wide_cols]
133
+ frames = []
134
+ for idx in sorted(indices, key=lambda v: int(v)):
135
+ fields = candidate_map[idx]
136
+ use_cols = base_cols + list(fields.values())
137
+ sub = df[use_cols].copy()
138
+ sub = sub.rename(
139
+ columns={
140
+ fields["voix"]: "voix",
141
+ fields["code_candidature"]: "code_candidature",
142
+ }
143
+ )
144
+ frames.append(sub)
145
+ return pd.concat(frames, ignore_index=True)
146
+
147
+
148
+ def deduplicate_columns(df: pd.DataFrame) -> pd.DataFrame:
149
+ """
150
+ If multiple columns end up with the same name after rename/normalization,
151
+ keep the first non-null value across duplicates and drop the extras.
152
+ """
153
+ df = df.copy()
154
+ duplicates = df.columns[df.columns.duplicated()].unique()
155
+ for col in duplicates:
156
+ cols = [c for c in df.columns if c == col]
157
+ base = df[cols[0]]
158
+ for extra in cols[1:]:
159
+ base = base.fillna(df[extra])
160
+ df[col] = base
161
+ df = df.drop(columns=cols[1:])
162
+ # ensure uniqueness
163
+ df = df.loc[:, ~df.columns.duplicated()]
164
+ return df
165
+
166
+
167
+ def load_raw(
168
+ path: Path,
169
+ *,
170
+ sep: str = ";",
171
+ encoding: str | Iterable[str] = "cp1252",
172
+ decimal: str = ",",
173
+ dtype: Optional[Mapping[str, str]] = None,
174
+ engine: str = "c",
175
+ ) -> pd.DataFrame:
176
+ """
177
+ Wrapper around read_csv with encoding fallbacks to mitigate mojibake.
178
+
179
+ Tries encodings in order (default: cp1252, utf-8-sig, latin-1) until column
180
+ names no longer contain replacement artefacts (� or Ã), then normalises labels.
181
+ """
182
+ encoding_choices: List[str] = []
183
+ if isinstance(encoding, str):
184
+ encoding_choices.append(encoding)
185
+ else:
186
+ encoding_choices.extend(list(encoding))
187
+ encoding_choices.extend([e for e in ["utf-8-sig", "latin-1"] if e not in encoding_choices])
188
+
189
+ last_exc: Optional[Exception] = None
190
+ for enc in encoding_choices:
191
+ try:
192
+ try:
193
+ df = pd.read_csv(
194
+ path,
195
+ sep=sep,
196
+ encoding=enc,
197
+ decimal=decimal,
198
+ dtype=dtype, # type: ignore
199
+ engine=engine, # type: ignore
200
+ low_memory=False,
201
+ )
202
+ except pd.errors.ParserError:
203
+ # Retry with python engine and skip malformed lines (low_memory not supported)
204
+ df = pd.read_csv(
205
+ path,
206
+ sep=sep,
207
+ encoding=enc,
208
+ decimal=decimal,
209
+ dtype=dtype, # type: ignore
210
+ engine="python",
211
+ on_bad_lines="skip",
212
+ )
213
+ except UnicodeDecodeError as exc:
214
+ last_exc = exc
215
+ continue
216
+
217
+ bad_cols = any(("�" in col) or ("Ã" in col) for col in df.columns)
218
+ if bad_cols and enc != encoding_choices[-1]:
219
+ # try next encoding candidate
220
+ continue
221
+
222
+ df.columns = [_normalize_label(c) for c in df.columns]
223
+ return df
224
+
225
+ if last_exc:
226
+ raise last_exc
227
+ raise UnicodeDecodeError("utf-8", b"", 0, 1, "unable to decode with provided encodings")
228
+
229
+
230
+ def ensure_columns(df: pd.DataFrame, required: Iterable[str]) -> pd.DataFrame:
231
+ """
232
+ Add missing columns with NaN placeholders to guarantee downstream compatibility.
233
+ """
234
+ for col in required:
235
+ if col not in df.columns:
236
+ df[col] = np.nan
237
+ return df
238
+
239
+
240
+ def add_election_metadata(df: pd.DataFrame, meta: Mapping[str, object]) -> pd.DataFrame:
241
+ """
242
+ Attach metadata about the scrutin to each row.
243
+
244
+ Required meta keys:
245
+ - type_scrutin
246
+ - tour
247
+ - date_scrutin
248
+
249
+ Optional:
250
+ - annee (otherwise derived from date_scrutin)
251
+ """
252
+ df["type_scrutin"] = meta["type_scrutin"]
253
+ df["tour"] = int(meta["tour"]) # type: ignore
254
+ df["date_scrutin"] = pd.to_datetime(meta["date_scrutin"]) # type: ignore
255
+ df["annee"] = meta.get("annee", df["date_scrutin"].dt.year) # type: ignore
256
+ return df
257
+
258
+
259
+ def build_code_bv(df: pd.DataFrame, meta: Mapping[str, object]) -> pd.DataFrame:
260
+ """
261
+ Ensure a code_bv column exists. If already present, it is left intact.
262
+
263
+ Optionally, pass in meta["code_bv_cols"] as a list of column names to combine.
264
+ """
265
+ if "code_bv" in df.columns:
266
+ df["code_bv"] = df["code_bv"].astype(str).str.strip()
267
+ return df
268
+
269
+ columns_to_concat: Optional[List[str]] = meta.get("code_bv_cols") # type: ignore[arg-type]
270
+ if columns_to_concat:
271
+ actual_cols: List[str] = []
272
+ canon_map = {_canonical_label(col): col for col in df.columns}
273
+ for target in columns_to_concat:
274
+ canon = _canonical_label(target)
275
+ if canon in canon_map:
276
+ actual_cols.append(canon_map[canon])
277
+ else:
278
+ raise KeyError(f"{target!r} not found in columns. Available: {list(df.columns)}")
279
+
280
+ df["code_bv"] = (
281
+ df[actual_cols]
282
+ .astype(str)
283
+ .apply(lambda row: "-".join([v.zfill(3) if v.isdigit() else v for v in row]), axis=1)
284
+ )
285
+ else:
286
+ raise KeyError("code_bv not found in dataframe and no code_bv_cols provided in meta.")
287
+ return df
288
+
289
+
290
+ def coerce_numeric(df: pd.DataFrame, numeric_cols: Iterable[str] = NUMERIC_COLUMNS) -> pd.DataFrame:
291
+ for col in numeric_cols:
292
+ if col in df.columns:
293
+ df[col] = pd.to_numeric(df[col], errors="coerce")
294
+ return df
295
+
296
+
297
+ def basic_cleaning(df: pd.DataFrame) -> pd.DataFrame:
298
+ """
299
+ Apply harmonisations common to all scrutins.
300
+ """
301
+ df = df.copy()
302
+ df["voix"] = df.get("voix", 0).fillna(0) # type: ignore
303
+
304
+ # Recompute exprimes when possible
305
+ mask_expr = (
306
+ df["exprimes"].isna()
307
+ & df["votants"].notna()
308
+ & df["blancs"].notna()
309
+ & df["nuls"].notna()
310
+ )
311
+ df.loc[mask_expr, "exprimes"] = (
312
+ df.loc[mask_expr, "votants"] - df.loc[mask_expr, "blancs"] - df.loc[mask_expr, "nuls"]
313
+ )
314
+
315
+ # Remove rows without minimal identifiers
316
+ df = df[df["code_bv"].notna()]
317
+ return df
318
+
319
+
320
+ def standardize_election(
321
+ path: Path,
322
+ meta: Mapping[str, object],
323
+ *,
324
+ rename_map: Optional[Mapping[str, str]] = None,
325
+ sep: str = ";",
326
+ encoding: str | Iterable[str] = ("cp1252", "utf-8-sig", "latin-1"),
327
+ decimal: str = ",",
328
+ dtype: Optional[Mapping[str, str]] = None,
329
+ ) -> pd.DataFrame:
330
+ """
331
+ Load and standardise a single raw table to the long format expected downstream.
332
+
333
+ Parameters
334
+ ----------
335
+ path : Path
336
+ CSV path to the raw election table.
337
+ meta : Mapping
338
+ Must contain type_scrutin, tour, date_scrutin. Optionally code_bv_cols and annee.
339
+ rename_map : Mapping
340
+ Columns to rename from the raw schema to the standard schema.
341
+ """
342
+ df_raw = load_raw(path, sep=sep, encoding=encoding, decimal=decimal, dtype=dtype)
343
+ rename_norm = {_normalize_label(k): v for k, v in (rename_map or {}).items()}
344
+
345
+ def _process(df: pd.DataFrame, meta_for_tour: Mapping[str, object]) -> pd.DataFrame:
346
+ df_local = df.copy()
347
+ df_local.columns = [_normalize_label(c) for c in df_local.columns]
348
+ df_local = _unpivot_wide_candidates(df_local)
349
+ if rename_norm:
350
+ # Renommer en se basant sur une version canonique (sans accents/espaces) et en ignorant d'éventuels suffixes numériques.
351
+ import re
352
+
353
+ def canonical_base(label: str) -> str:
354
+ base = _canonical_label(label)
355
+ return re.sub(r"\\d+$", "", base)
356
+
357
+ rename_by_base = {canonical_base(k): v for k, v in rename_norm.items()}
358
+ rename_using = {}
359
+ for col in df_local.columns:
360
+ base = canonical_base(col)
361
+ if base in rename_by_base:
362
+ rename_using[col] = rename_by_base[base]
363
+ df_local = df_local.rename(columns=rename_using)
364
+ df_local = deduplicate_columns(df_local)
365
+ df_local = df_local.loc[:, ~df_local.columns.duplicated()]
366
+
367
+ df_local = build_code_bv(df_local, meta_for_tour)
368
+ df_local = add_election_metadata(df_local, meta_for_tour)
369
+ df_local = ensure_columns(df_local, STANDARD_COLUMNS)
370
+ df_local = coerce_numeric(df_local)
371
+ df_local = basic_cleaning(df_local)
372
+ ordered_cols = STANDARD_COLUMNS + [col for col in df_local.columns if col not in STANDARD_COLUMNS]
373
+ return df_local[ordered_cols]
374
+
375
+ # Multi-tour handling: split on tour_column if provided and "tour" not explicit
376
+ if meta.get("tour_column") and "tour" not in meta:
377
+ tour_col = _normalize_label(str(meta["tour_column"]))
378
+ if tour_col not in df_raw.columns:
379
+ # Fallback: considérer un seul tour = 1 si la colonne est introuvable
380
+ meta_single = {k: v for k, v in meta.items() if k != "tour_column"}
381
+ meta_single["tour"] = int(meta.get("tour", 1))
382
+ return _process(df_raw, meta_single)
383
+ tours = meta.get("tours") or sorted(df_raw[tour_col].dropna().unique())
384
+ frames: list[pd.DataFrame] = []
385
+ for tour_val in tours:
386
+ meta_tour = {k: v for k, v in meta.items() if k != "tour_column"}
387
+ meta_tour["tour"] = int(tour_val)
388
+ frames.append(_process(df_raw[df_raw[tour_col] == tour_val], meta_tour))
389
+ if not frames:
390
+ raise RuntimeError(f"Aucun tour détecté pour {path.name}")
391
+ return pd.concat(frames, ignore_index=True)
392
+
393
+ return _process(df_raw, meta)
394
+
395
+
396
+ def validate_consistency(df: pd.DataFrame, *, tolerance: float = 0.02) -> Dict[str, pd.DataFrame]:
397
+ """
398
+ Quick validation checks. Returns a dict of issues to inspect.
399
+ """
400
+ issues: Dict[str, pd.DataFrame] = {}
401
+
402
+ if {"votants", "inscrits"}.issubset(df.columns):
403
+ issues["votants_gt_inscrits"] = df[df["votants"] > df["inscrits"]]
404
+
405
+ if {"exprimes", "blancs", "nuls", "votants"}.issubset(df.columns):
406
+ expr_gap = df.copy()
407
+ expr_gap["gap"] = (
408
+ (expr_gap["exprimes"] + expr_gap["blancs"] + expr_gap["nuls"] - expr_gap["votants"])
409
+ / expr_gap["votants"].replace(0, np.nan)
410
+ )
411
+ issues["exprimes_balance_off"] = expr_gap[expr_gap["gap"].abs() > tolerance]
412
+
413
+ if {"code_bv", "type_scrutin", "tour", "exprimes", "voix"}.issubset(df.columns):
414
+ sums = df.groupby(["code_bv", "type_scrutin", "tour"], as_index=False)[["exprimes", "voix"]].sum()
415
+ sums["gap"] = (sums["voix"] - sums["exprimes"]) / sums["exprimes"].replace(0, np.nan)
416
+ issues["sum_voix_vs_exprimes"] = sums[sums["gap"].abs() > tolerance]
417
+
418
+ return issues
src/database.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from pathlib import Path
5
+ from typing import Iterable, Optional
6
+
7
+ import pandas as pd
8
+ import sqlalchemy as sa
9
+ from sqlalchemy import Column, Date, Float, Integer, MetaData, String, Table
10
+ from sqlalchemy.engine import Engine
11
+
12
+ from .constants import NUMERIC_COLUMNS
13
+ from .pipeline import normalize_bloc
14
+
15
+
16
+ def get_engine(url: Optional[str] = None) -> Engine:
17
+ db_url = url or os.getenv("DATABASE_URL")
18
+ if not db_url:
19
+ raise RuntimeError("DATABASE_URL is not set. Example: postgresql+psycopg2://user:pass@localhost:5432/elections")
20
+ return sa.create_engine(db_url)
21
+
22
+
23
+ def define_schema(metadata: MetaData) -> Table:
24
+ return Table(
25
+ "election_results",
26
+ metadata,
27
+ Column("id", Integer, primary_key=True, autoincrement=True),
28
+ Column("code_bv", String(32), index=True, nullable=False),
29
+ Column("nom_bv", String(255)),
30
+ Column("date_scrutin", Date, index=True, nullable=False),
31
+ Column("annee", Integer, index=True, nullable=False),
32
+ Column("type_scrutin", String(32), index=True, nullable=False),
33
+ Column("tour", Integer, nullable=False),
34
+ Column("bloc", String(64), index=True, nullable=False),
35
+ Column("voix_bloc", Float),
36
+ Column("exprimes", Float),
37
+ Column("inscrits", Float),
38
+ Column("votants", Float),
39
+ Column("blancs", Float),
40
+ Column("nuls", Float),
41
+ Column("part_bloc", Float),
42
+ Column("part_bloc_national", Float),
43
+ Column("taux_participation_national", Float),
44
+ Column("taux_participation_bv", Float),
45
+ Column("taux_blancs_bv", Float),
46
+ Column("taux_nuls_bv", Float),
47
+ Column("ecart_bloc_vs_national", Float),
48
+ Column("ecart_participation_vs_nat", Float),
49
+ Column("croissance_inscrits_depuis_base", Float),
50
+ Column("part_bloc_lag1", Float),
51
+ Column("ecart_bloc_vs_national_lag1", Float),
52
+ Column("taux_participation_bv_lag1", Float),
53
+ Column("annee_centre", Float),
54
+ )
55
+
56
+
57
+ def create_schema(engine: Engine) -> None:
58
+ metadata = MetaData()
59
+ define_schema(metadata)
60
+ metadata.create_all(engine)
61
+
62
+
63
+ def _coerce_numeric(df: pd.DataFrame, numeric_cols: Iterable[str]) -> pd.DataFrame:
64
+ for col in numeric_cols:
65
+ if col in df.columns:
66
+ df[col] = pd.to_numeric(df[col], errors="coerce")
67
+ return df
68
+
69
+
70
+ def load_processed_to_db(
71
+ processed_path: Path = Path("data/processed/elections_blocs.csv"),
72
+ *,
73
+ engine: Optional[Engine] = None,
74
+ if_exists: str = "replace",
75
+ chunksize: int = 1000,
76
+ ) -> int:
77
+ """
78
+ Load the processed bloc-level dataset into PostgreSQL.
79
+
80
+ Returns the number of rows written.
81
+ """
82
+ engine = engine or get_engine()
83
+ create_schema(engine)
84
+
85
+ df = pd.read_csv(processed_path, sep=";")
86
+ df["date_scrutin"] = pd.to_datetime(df["date_scrutin"]).dt.date
87
+ if "bloc" in df.columns:
88
+ df["bloc"] = df["bloc"].apply(normalize_bloc)
89
+ df = _coerce_numeric(df, NUMERIC_COLUMNS)
90
+
91
+ df.to_sql(
92
+ "election_results",
93
+ engine,
94
+ if_exists=if_exists,
95
+ index=False,
96
+ method="multi",
97
+ chunksize=chunksize,
98
+ )
99
+ return len(df)
100
+
101
+
102
+ def list_bureaux(engine: Engine) -> list[str]:
103
+ with engine.connect() as conn:
104
+ result = conn.execute(sa.text("select distinct code_bv from election_results order by code_bv"))
105
+ return [row[0] for row in result.fetchall()]
106
+
107
+
108
+ def fetch_history(engine: Engine, code_bv: str) -> pd.DataFrame:
109
+ query = sa.text(
110
+ """
111
+ select *
112
+ from election_results
113
+ where code_bv = :code_bv
114
+ order by date_scrutin asc, bloc asc
115
+ """
116
+ )
117
+ return pd.read_sql(query, engine, params={"code_bv": code_bv})
118
+
119
+
120
+ __all__ = [
121
+ "create_schema",
122
+ "define_schema",
123
+ "fetch_history",
124
+ "get_engine",
125
+ "list_bureaux",
126
+ "load_processed_to_db",
127
+ ]
128
+
129
+
130
+ if __name__ == "__main__":
131
+ import argparse
132
+
133
+ parser = argparse.ArgumentParser(description="Initialise la base et charge les résultats.")
134
+ parser.add_argument(
135
+ "--load",
136
+ action="store_true",
137
+ help="Charger data/processed/elections_blocs.csv dans la base (remplace la table).",
138
+ )
139
+ parser.add_argument(
140
+ "--path",
141
+ type=Path,
142
+ default=Path("data/processed/elections_blocs.csv"),
143
+ help="Chemin vers le fichier processe (CSV ; par defaut data/processed/elections_blocs.csv).",
144
+ )
145
+ args = parser.parse_args()
146
+
147
+ engine = get_engine()
148
+ create_schema(engine)
149
+ if args.load:
150
+ rows = load_processed_to_db(args.path, engine=engine)
151
+ print(f"{rows} lignes inserees dans election_results.")
152
+ else:
153
+ print("Schema cree. Utilisez --load pour charger les donnees.")
src/db/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ """
2
+ Database schema and ingestion utilities.
3
+ """
src/db/ingest.py ADDED
@@ -0,0 +1,241 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import logging
5
+ from pathlib import Path
6
+ from typing import Dict, Iterable, Tuple
7
+
8
+ import numpy as np
9
+ import pandas as pd
10
+ import sqlalchemy as sa
11
+ from sqlalchemy.dialects.postgresql import insert
12
+
13
+ from src.constants import CANDIDATE_CATEGORIES
14
+ from src.data import preprocess as preprocess_module
15
+ from src.db.schema import (
16
+ bureaux,
17
+ categories,
18
+ communes,
19
+ create_schema,
20
+ elections,
21
+ get_engine,
22
+ results_local,
23
+ results_national,
24
+ )
25
+ from src.features import build_features
26
+
27
+ LOGGER = logging.getLogger(__name__)
28
+ TARGET_COLS = [f"target_share_{c}" for c in CANDIDATE_CATEGORIES]
29
+ ID_COLS = ["commune_code", "code_bv", "election_type", "election_year", "round", "date_scrutin"]
30
+
31
+
32
+ def load_panel(input_path: Path) -> pd.DataFrame:
33
+ if not input_path.exists():
34
+ raise FileNotFoundError(f"Dataset panel introuvable : {input_path}")
35
+ if input_path.suffix == ".parquet":
36
+ return pd.read_parquet(input_path)
37
+ return pd.read_csv(input_path, sep=";")
38
+
39
+
40
+ def ensure_panel_exists(panel_path: Path, elections_long_path: Path, mapping_path: Path) -> pd.DataFrame:
41
+ if panel_path.exists():
42
+ return load_panel(panel_path)
43
+ LOGGER.info("Panel manquant, tentative de reconstruction via preprocess + build_features.")
44
+ if not elections_long_path.exists():
45
+ preprocess_module.preprocess_all(Path("data/raw"), elections_long_path.parent, preprocess_module.DEFAULT_META_CONFIG)
46
+ build_features.build_panel(elections_long_path, mapping_path, panel_path, csv_output=None)
47
+ return load_panel(panel_path)
48
+
49
+
50
+ def check_mass(panel: pd.DataFrame, tolerance: float = 0.05) -> None:
51
+ sums = panel[TARGET_COLS].sum(axis=1)
52
+ bad = panel[(sums < (1 - tolerance)) | (sums > (1 + tolerance))]
53
+ if not bad.empty:
54
+ LOGGER.warning("Somme des parts hors intervalle attendu pour %s lignes (tol=%s).", len(bad), tolerance)
55
+
56
+
57
+ def melt_panel(panel: pd.DataFrame) -> pd.DataFrame:
58
+ long_df = panel.melt(id_vars=ID_COLS + ["turnout_pct"], value_vars=TARGET_COLS, var_name="category", value_name="share")
59
+ long_df["category"] = long_df["category"].str.replace("target_share_", "", regex=False)
60
+ return long_df
61
+
62
+
63
+ def _upsert_simple(conn, table, rows: Iterable[dict], index_elements: Iterable[str]) -> None:
64
+ stmt = insert(table).values(list(rows))
65
+ stmt = stmt.on_conflict_do_nothing(index_elements=list(index_elements))
66
+ if rows:
67
+ conn.execute(stmt)
68
+
69
+
70
+ def ingest(panel: pd.DataFrame, engine) -> None:
71
+ check_mass(panel)
72
+ panel = panel.copy()
73
+ panel["round"] = panel["round"].fillna(1).astype(int)
74
+ panel["date_scrutin"] = pd.to_datetime(panel["date_scrutin"]).dt.date
75
+
76
+ long_df = melt_panel(panel)
77
+ long_df = long_df[long_df["category"].isin(CANDIDATE_CATEGORIES)]
78
+ long_df["share_pct"] = (long_df["share"].astype(float) * 100).round(6)
79
+
80
+ with engine.begin() as conn:
81
+ create_schema(conn)
82
+ LOGGER.info("Schéma vérifié.")
83
+
84
+ _upsert_simple(conn, categories, [{"name": cat} for cat in CANDIDATE_CATEGORIES], ["name"])
85
+ cat_map = dict(conn.execute(sa.select(categories.c.name, categories.c.id)))
86
+
87
+ commune_rows = [
88
+ {"name_normalized": code, "insee_code": code}
89
+ for code in sorted(long_df["commune_code"].dropna().unique())
90
+ ]
91
+ _upsert_simple(conn, communes, commune_rows, ["insee_code"])
92
+ commune_map = dict(conn.execute(sa.select(communes.c.insee_code, communes.c.id)))
93
+
94
+ def bureau_code_only(code_bv: str) -> str:
95
+ if "-" in str(code_bv):
96
+ parts = str(code_bv).split("-", 1)
97
+ return parts[1]
98
+ return str(code_bv)
99
+
100
+ bureau_rows = []
101
+ for _, row in long_df.drop_duplicates(subset=["commune_code", "code_bv"]).iterrows():
102
+ commune_id = commune_map.get(row["commune_code"])
103
+ if commune_id is None:
104
+ continue
105
+ bureau_rows.append(
106
+ {
107
+ "commune_id": commune_id,
108
+ "bureau_code": bureau_code_only(row["code_bv"]),
109
+ "bureau_label": None,
110
+ }
111
+ )
112
+ _upsert_simple(conn, bureaux, bureau_rows, ["commune_id", "bureau_code"])
113
+ bureau_map = {
114
+ (commune_id, bureau_code): bureau_id
115
+ for bureau_id, commune_id, bureau_code in conn.execute(
116
+ sa.select(bureaux.c.id, bureaux.c.commune_id, bureaux.c.bureau_code)
117
+ )
118
+ }
119
+
120
+ election_rows = []
121
+ for _, row in panel.drop_duplicates(subset=["election_type", "election_year", "round"]).iterrows():
122
+ election_rows.append(
123
+ {
124
+ "election_type": row["election_type"],
125
+ "election_year": int(row["election_year"]),
126
+ "round": int(row["round"]) if not pd.isna(row["round"]) else None,
127
+ "date": row["date_scrutin"],
128
+ }
129
+ )
130
+ _upsert_simple(conn, elections, election_rows, ["election_type", "election_year", "round"])
131
+ election_map: Dict[Tuple[str, int, int], int] = {
132
+ (etype, year, int(round_) if round_ is not None else 1): eid
133
+ for eid, etype, year, round_ in conn.execute(
134
+ sa.select(elections.c.id, elections.c.election_type, elections.c.election_year, elections.c.round)
135
+ )
136
+ }
137
+
138
+ local_rows = []
139
+ for row in long_df.itertuples(index=False):
140
+ commune_id = commune_map.get(row.commune_code)
141
+ if commune_id is None:
142
+ continue
143
+ bureau_id = bureau_map.get((commune_id, bureau_code_only(row.code_bv)))
144
+ election_id = election_map.get((row.election_type, int(row.election_year), int(row.round)))
145
+ category_id = cat_map.get(row.category)
146
+ if None in (bureau_id, election_id, category_id):
147
+ continue
148
+ turnout_pct = None if pd.isna(row.turnout_pct) else float(row.turnout_pct) * 100
149
+ local_rows.append(
150
+ {
151
+ "bureau_id": bureau_id,
152
+ "election_id": election_id,
153
+ "category_id": category_id,
154
+ "share_pct": None if pd.isna(row.share_pct) else float(row.share_pct),
155
+ "votes": None,
156
+ "expressed": None,
157
+ "turnout_pct": turnout_pct,
158
+ }
159
+ )
160
+ if local_rows:
161
+ stmt = insert(results_local).values(local_rows)
162
+ stmt = stmt.on_conflict_do_update(
163
+ index_elements=["bureau_id", "election_id", "category_id"],
164
+ set_={
165
+ "share_pct": stmt.excluded.share_pct,
166
+ "votes": stmt.excluded.votes,
167
+ "expressed": stmt.excluded.expressed,
168
+ "turnout_pct": stmt.excluded.turnout_pct,
169
+ },
170
+ )
171
+ conn.execute(stmt)
172
+ LOGGER.info("Résultats locaux insérés/mis à jour : %s lignes", len(local_rows))
173
+
174
+ nat_rows = []
175
+ nat = (
176
+ long_df.groupby(["election_type", "election_year", "round", "category"], as_index=False)
177
+ .agg(share=("share_pct", "mean"))
178
+ .rename(columns={"share": "share_pct"})
179
+ )
180
+ # Participation moyenne par scrutin
181
+ turnout_nat = panel.groupby(["election_type", "election_year", "round"], as_index=False)["turnout_pct"].mean()
182
+ nat = nat.merge(turnout_nat, on=["election_type", "election_year", "round"], how="left")
183
+
184
+ for row in nat.itertuples(index=False):
185
+ election_id = election_map.get((row.election_type, int(row.election_year), int(row.round)))
186
+ category_id = cat_map.get(row.category)
187
+ if None in (election_id, category_id):
188
+ continue
189
+ nat_rows.append(
190
+ {
191
+ "election_id": election_id,
192
+ "category_id": category_id,
193
+ "share_pct": None if pd.isna(row.share_pct) else float(row.share_pct),
194
+ "votes": None,
195
+ "expressed": None,
196
+ "turnout_pct": None if pd.isna(row.turnout_pct) else float(row.turnout_pct * 100),
197
+ }
198
+ )
199
+ if nat_rows:
200
+ stmt = insert(results_national).values(nat_rows)
201
+ stmt = stmt.on_conflict_do_update(
202
+ index_elements=["election_id", "category_id"],
203
+ set_={
204
+ "share_pct": stmt.excluded.share_pct,
205
+ "votes": stmt.excluded.votes,
206
+ "expressed": stmt.excluded.expressed,
207
+ "turnout_pct": stmt.excluded.turnout_pct,
208
+ },
209
+ )
210
+ conn.execute(stmt)
211
+ LOGGER.info("Référentiels nationaux insérés/mis à jour : %s lignes", len(nat_rows))
212
+
213
+
214
+ def parse_args() -> argparse.Namespace:
215
+ parser = argparse.ArgumentParser(description="Ingestion du panel harmonisé dans PostgreSQL.")
216
+ parser.add_argument("--input", type=Path, default=Path("data/processed/panel.parquet"), help="Chemin vers le panel parquet.")
217
+ parser.add_argument(
218
+ "--elections-long",
219
+ type=Path,
220
+ default=Path("data/interim/elections_long.parquet"),
221
+ help="Format long (fallback pour reconstruire le panel).",
222
+ )
223
+ parser.add_argument(
224
+ "--mapping",
225
+ type=Path,
226
+ default=Path("data/mapping_candidats_blocs.csv"),
227
+ help="Mapping nuance -> catégorie (fallback).",
228
+ )
229
+ return parser.parse_args()
230
+
231
+
232
+ def main() -> None:
233
+ logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s")
234
+ args = parse_args()
235
+ panel = ensure_panel_exists(args.input, args.elections_long, args.mapping)
236
+ engine = get_engine()
237
+ ingest(panel, engine)
238
+
239
+
240
+ if __name__ == "__main__":
241
+ main()
src/db/schema.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from typing import Optional
5
+
6
+ import sqlalchemy as sa
7
+ from sqlalchemy import Column, Date, Float, ForeignKey, Integer, MetaData, String, Table, UniqueConstraint
8
+ from sqlalchemy.engine import Engine
9
+
10
+ metadata = MetaData()
11
+
12
+ communes = Table(
13
+ "communes",
14
+ metadata,
15
+ Column("id", Integer, primary_key=True, autoincrement=True),
16
+ Column("name_normalized", String(255), nullable=True),
17
+ Column("insee_code", String(12), nullable=False, unique=True, index=True),
18
+ )
19
+
20
+ bureaux = Table(
21
+ "bureaux",
22
+ metadata,
23
+ Column("id", Integer, primary_key=True, autoincrement=True),
24
+ Column("commune_id", Integer, ForeignKey("communes.id"), nullable=False),
25
+ Column("bureau_code", String(32), nullable=False),
26
+ Column("bureau_label", String(255), nullable=True),
27
+ UniqueConstraint("commune_id", "bureau_code", name="uq_bureau_commune_code"),
28
+ )
29
+
30
+ elections = Table(
31
+ "elections",
32
+ metadata,
33
+ Column("id", Integer, primary_key=True, autoincrement=True),
34
+ Column("election_type", String(32), nullable=False),
35
+ Column("election_year", Integer, nullable=False),
36
+ Column("round", Integer, nullable=True),
37
+ Column("date", Date, nullable=True),
38
+ UniqueConstraint("election_type", "election_year", "round", name="uq_election_unique"),
39
+ )
40
+
41
+ categories = Table(
42
+ "categories",
43
+ metadata,
44
+ Column("id", Integer, primary_key=True, autoincrement=True),
45
+ Column("name", String(64), nullable=False, unique=True),
46
+ )
47
+
48
+ results_local = Table(
49
+ "results_local",
50
+ metadata,
51
+ Column("id", Integer, primary_key=True, autoincrement=True),
52
+ Column("bureau_id", Integer, ForeignKey("bureaux.id"), nullable=False),
53
+ Column("election_id", Integer, ForeignKey("elections.id"), nullable=False),
54
+ Column("category_id", Integer, ForeignKey("categories.id"), nullable=False),
55
+ Column("share_pct", Float, nullable=True),
56
+ Column("votes", Float, nullable=True),
57
+ Column("expressed", Float, nullable=True),
58
+ Column("turnout_pct", Float, nullable=True),
59
+ UniqueConstraint("bureau_id", "election_id", "category_id", name="uq_local_bureau_election_category"),
60
+ )
61
+
62
+ results_national = Table(
63
+ "results_national",
64
+ metadata,
65
+ Column("id", Integer, primary_key=True, autoincrement=True),
66
+ Column("election_id", Integer, ForeignKey("elections.id"), nullable=False),
67
+ Column("category_id", Integer, ForeignKey("categories.id"), nullable=False),
68
+ Column("share_pct", Float, nullable=True),
69
+ Column("votes", Float, nullable=True),
70
+ Column("expressed", Float, nullable=True),
71
+ Column("turnout_pct", Float, nullable=True),
72
+ UniqueConstraint("election_id", "category_id", name="uq_nat_election_category"),
73
+ )
74
+
75
+
76
+ def _build_url_from_env() -> Optional[str]:
77
+ user = os.getenv("DB_USER") or os.getenv("POSTGRES_USER")
78
+ password = os.getenv("DB_PASSWORD") or os.getenv("POSTGRES_PASSWORD")
79
+ host = os.getenv("DB_HOST", "localhost")
80
+ port = os.getenv("DB_PORT", os.getenv("POSTGRES_PORT", "5432"))
81
+ db_name = os.getenv("DB_NAME") or os.getenv("POSTGRES_DB")
82
+ if user and password and db_name:
83
+ return f"postgresql+psycopg2://{user}:{password}@{host}:{port}/{db_name}"
84
+ return None
85
+
86
+
87
+ def get_engine(url: Optional[str] = None) -> Engine:
88
+ db_url = url or os.getenv("DATABASE_URL") or _build_url_from_env()
89
+ if not db_url:
90
+ raise RuntimeError("DATABASE_URL or DB_* env vars must be set.")
91
+ return sa.create_engine(db_url)
92
+
93
+
94
+ def create_schema(engine: Engine) -> None:
95
+ metadata.create_all(engine)
src/features/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ """
2
+ Feature engineering subpackage.
3
+ """
src/features/build_features.py ADDED
@@ -0,0 +1,570 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import logging
5
+ import re
6
+ import unicodedata
7
+ from functools import reduce
8
+ from pathlib import Path
9
+ from typing import Dict, Iterable, List
10
+
11
+ import numpy as np
12
+ import pandas as pd
13
+
14
+ from src.constants import CANDIDATE_CATEGORIES
15
+
16
+ LOGGER = logging.getLogger(__name__)
17
+
18
+ INDEX_COLS = [
19
+ "commune_code",
20
+ "code_bv",
21
+ "election_type",
22
+ "election_year",
23
+ "round",
24
+ "date_scrutin",
25
+ ]
26
+
27
+ PRESIDENTIAL_NAME_TO_CATEGORY = {
28
+ "arthaud": "extreme_gauche",
29
+ "poutou": "extreme_gauche",
30
+ "melenchon": "gauche_dure",
31
+ "roussel": "gauche_dure",
32
+ "hidalgo": "gauche_modere",
33
+ "jadot": "gauche_modere",
34
+ "hamon": "gauche_modere",
35
+ "macron": "centre",
36
+ "lassalle": "centre",
37
+ "cheminade": "centre",
38
+ "pecresse": "droite_modere",
39
+ "fillon": "droite_modere",
40
+ "dupontaignan": "droite_dure",
41
+ "asselineau": "droite_dure",
42
+ "lepen": "extreme_droite",
43
+ "zemmour": "extreme_droite",
44
+ }
45
+
46
+ EUROPEAN_LIST_KEYWORDS: list[tuple[str, str]] = [
47
+ ("rassemblementnational", "extreme_droite"),
48
+ ("lepen", "extreme_droite"),
49
+ ("republiqueenmarche", "centre"),
50
+ ("renaissance", "centre"),
51
+ ("modem", "centre"),
52
+ ("franceinsoumise", "gauche_dure"),
53
+ ("lutteouvriere", "extreme_gauche"),
54
+ ("revolutionnairecommunistes", "extreme_gauche"),
55
+ ("communiste", "gauche_dure"),
56
+ ("deboutlafrance", "droite_dure"),
57
+ ("dupontaignan", "droite_dure"),
58
+ ("frexit", "droite_dure"),
59
+ ("patriotes", "droite_dure"),
60
+ ("uniondeladroite", "droite_modere"),
61
+ ("droiteetducentre", "droite_modere"),
62
+ ("printempseuropeen", "gauche_modere"),
63
+ ("generation", "gauche_modere"),
64
+ ("animaliste", "gauche_modere"),
65
+ ("ecolog", "gauche_modere"),
66
+ ("federaliste", "centre"),
67
+ ("pirate", "centre"),
68
+ ("citoyenseuropeens", "centre"),
69
+ ("leseuropeens", "centre"),
70
+ ("lesoubliesdeleurope", "centre"),
71
+ ("initiativecitoyenne", "centre"),
72
+ ("esperanto", "centre"),
73
+ ("europeauservicedespeuples", "droite_dure"),
74
+ ("franceroyale", "extreme_droite"),
75
+ ("pourleuropedesgens", "gauche_dure"),
76
+ ("allonsenfants", "droite_modere"),
77
+ ("alliancejaune", "centre"),
78
+ ("giletsjaunes", "centre"),
79
+ ]
80
+
81
+
82
+ def normalize_category(label: str | None) -> str | None:
83
+ if label is None:
84
+ return None
85
+ norm = str(label).strip().lower().replace(" ", "_").replace("-", "_")
86
+ synonyms = {
87
+ "doite_dure": "droite_dure",
88
+ "droite_moderee": "droite_modere",
89
+ "gauche_moderee": "gauche_modere",
90
+ "extreme_gauche": "extreme_gauche",
91
+ "extreme_droite": "extreme_droite",
92
+ "divers": None,
93
+ "gauche": "gauche_modere",
94
+ "droite": "droite_modere",
95
+ }
96
+ mapped = synonyms.get(norm, norm)
97
+ if mapped in CANDIDATE_CATEGORIES:
98
+ return mapped
99
+ return None
100
+
101
+
102
+ def _normalize_code_series(series: pd.Series) -> pd.Series:
103
+ return (
104
+ series.astype("string")
105
+ .str.strip()
106
+ .str.upper()
107
+ .replace({"NAN": pd.NA, "NONE": pd.NA, "": pd.NA, "<NA>": pd.NA})
108
+ )
109
+
110
+
111
+ def _normalize_person_name(value: str | None) -> str:
112
+ if value is None:
113
+ return ""
114
+ text = str(value).strip().lower()
115
+ if not text:
116
+ return ""
117
+ text = unicodedata.normalize("NFD", text)
118
+ text = "".join(ch for ch in text if unicodedata.category(ch) != "Mn")
119
+ return re.sub(r"[^a-z]", "", text)
120
+
121
+
122
+ def _category_from_name(name: str | None) -> str | None:
123
+ norm = _normalize_person_name(name)
124
+ if not norm:
125
+ return None
126
+ for key, category in PRESIDENTIAL_NAME_TO_CATEGORY.items():
127
+ if key in norm:
128
+ return category
129
+ return None
130
+
131
+
132
+ def _category_from_list_name(name: str | None) -> str | None:
133
+ norm = _normalize_person_name(name)
134
+ if not norm:
135
+ return None
136
+ for key, category in EUROPEAN_LIST_KEYWORDS:
137
+ if key in norm:
138
+ return category
139
+ return None
140
+
141
+
142
+ def load_elections_long(path: Path, commune_code: str | None = None) -> pd.DataFrame:
143
+ if not path.exists():
144
+ raise FileNotFoundError(f"Fichier long introuvable : {path}")
145
+ if path.suffix == ".parquet":
146
+ df = pd.read_parquet(path)
147
+ else:
148
+ df = pd.read_csv(path, sep=";")
149
+ df["date_scrutin"] = pd.to_datetime(df["date_scrutin"])
150
+ df["annee"] = pd.to_numeric(df["annee"], errors="coerce").fillna(df["date_scrutin"].dt.year)
151
+ df["election_year"] = df["annee"]
152
+ df["tour"] = pd.to_numeric(df["tour"], errors="coerce")
153
+ df["round"] = df["tour"]
154
+ for col in ["exprimes", "votants", "inscrits", "voix", "blancs", "nuls"]:
155
+ if col in df.columns:
156
+ df[col] = pd.to_numeric(df[col], errors="coerce")
157
+ if "code_candidature" in df.columns:
158
+ df["code_candidature"] = _normalize_code_series(df["code_candidature"])
159
+ if "code_commune" in df.columns:
160
+ df["code_commune"] = (
161
+ df["code_commune"]
162
+ .astype(str)
163
+ .str.strip()
164
+ .str.replace(r"\.0$", "", regex=True)
165
+ )
166
+ else:
167
+ df["code_commune"] = df["code_bv"].astype(str).str.split("-").str[0]
168
+ if commune_code is not None:
169
+ df = df[df["code_commune"].astype(str) == str(commune_code)].copy()
170
+ df = _unpivot_wide_candidates(df)
171
+ if "code_candidature" in df.columns:
172
+ df["code_candidature"] = _normalize_code_series(df["code_candidature"])
173
+ df["type_scrutin"] = df["type_scrutin"].str.lower()
174
+ df["election_type"] = df["type_scrutin"]
175
+ return df
176
+
177
+
178
+ def _unpivot_wide_candidates(df: pd.DataFrame) -> pd.DataFrame:
179
+ df = df.copy()
180
+ voix_cols = [c for c in df.columns if re.match(r"^Voix \d+$", str(c))]
181
+ if not voix_cols:
182
+ return df
183
+ wide_mask = df[voix_cols].notna().any(axis=1)
184
+
185
+ def _fill_unsuffixed_rows(local: pd.DataFrame) -> pd.DataFrame:
186
+ # Some datasets only expose unsuffixed columns (Voix, Code Nuance).
187
+ if "voix" in local.columns and "Voix" in local.columns:
188
+ missing_voix = local["voix"].isna() | (local["voix"] == 0)
189
+ local.loc[missing_voix, "voix"] = pd.to_numeric(
190
+ local.loc[missing_voix, "Voix"],
191
+ errors="coerce",
192
+ )
193
+ if "code_candidature" in local.columns:
194
+ if "Code Nuance" in local.columns:
195
+ local["code_candidature"] = local["code_candidature"].fillna(local["Code Nuance"])
196
+ if "Nuance" in local.columns:
197
+ local["code_candidature"] = local["code_candidature"].fillna(local["Nuance"])
198
+ if "nom_candidature" in local.columns:
199
+ if "Nom" in local.columns and "Prénom" in local.columns:
200
+ prenom = local["Prénom"].fillna("").astype(str).str.strip()
201
+ nom = local["Nom"].fillna("").astype(str).str.strip()
202
+ combined = (prenom + " " + nom).str.strip().replace("", pd.NA)
203
+ local["nom_candidature"] = local["nom_candidature"].fillna(combined)
204
+ elif "Nom" in local.columns:
205
+ local["nom_candidature"] = local["nom_candidature"].fillna(local["Nom"])
206
+ return local
207
+
208
+ if not wide_mask.any():
209
+ return _fill_unsuffixed_rows(df)
210
+
211
+ def _indexed_cols(pattern: str) -> Dict[int, str]:
212
+ mapping: Dict[int, str] = {}
213
+ for col in df.columns:
214
+ match = re.match(pattern, str(col))
215
+ if match:
216
+ mapping[int(match.group(1))] = col
217
+ return mapping
218
+
219
+ voice_map = _indexed_cols(r"^Voix (\d+)$")
220
+ code_map = _indexed_cols(r"^Code Nuance (\d+)$")
221
+ nuance_map = _indexed_cols(r"^Nuance (\d+)$")
222
+ for idx, col in nuance_map.items():
223
+ code_map.setdefault(idx, col)
224
+ if "voix" in df.columns:
225
+ voice_map.setdefault(1, "voix")
226
+ if "code_candidature" in df.columns:
227
+ code_map.setdefault(1, "code_candidature")
228
+
229
+ if not any(idx > 1 for idx in voice_map):
230
+ return df
231
+
232
+ drop_cols = {c for c in df.columns if re.search(r"\s\d+$", str(c))}
233
+ drop_cols.update({"voix", "code_candidature", "nom_candidature"})
234
+ base_cols = [c for c in df.columns if c not in drop_cols]
235
+
236
+ df_long = _fill_unsuffixed_rows(df[~wide_mask].copy())
237
+ df_wide = df[wide_mask].copy()
238
+ frames = []
239
+
240
+ def _compose_nom(idx: int) -> pd.Series | None:
241
+ series = pd.Series(pd.NA, index=df_wide.index, dtype="string")
242
+ etendu_col = f"Libellé Etendu Liste {idx}"
243
+ abrege_col = f"Libellé Abrégé Liste {idx}"
244
+ nom_col = f"Nom {idx}"
245
+ prenom_col = f"Prénom {idx}"
246
+
247
+ if etendu_col in df_wide.columns:
248
+ series = series.fillna(df_wide[etendu_col].astype("string"))
249
+ if abrege_col in df_wide.columns:
250
+ series = series.fillna(df_wide[abrege_col].astype("string"))
251
+ if nom_col in df_wide.columns and prenom_col in df_wide.columns:
252
+ prenom = df_wide[prenom_col].fillna("").astype(str).str.strip()
253
+ nom = df_wide[nom_col].fillna("").astype(str).str.strip()
254
+ combined = (prenom + " " + nom).str.strip().replace("", pd.NA)
255
+ series = series.fillna(combined)
256
+ elif nom_col in df_wide.columns:
257
+ series = series.fillna(df_wide[nom_col].astype("string"))
258
+ elif prenom_col in df_wide.columns:
259
+ series = series.fillna(df_wide[prenom_col].astype("string"))
260
+ if idx == 1 and "nom_candidature" in df_wide.columns:
261
+ series = series.fillna(df_wide["nom_candidature"].astype("string"))
262
+ if series.isna().all():
263
+ return None
264
+ return series
265
+
266
+ for idx in sorted(voice_map):
267
+ voix_col = voice_map[idx]
268
+ if voix_col not in df_wide.columns:
269
+ continue
270
+ temp = df_wide[base_cols].copy()
271
+ temp["voix"] = df_wide[voix_col]
272
+ code_candidates = []
273
+ if idx in code_map:
274
+ code_candidates.append(code_map[idx])
275
+ if idx in nuance_map and nuance_map[idx] not in code_candidates:
276
+ code_candidates.append(nuance_map[idx])
277
+ code_series = pd.Series(pd.NA, index=df_wide.index, dtype="string")
278
+ for candidate in code_candidates:
279
+ if candidate in df_wide.columns:
280
+ code_series = code_series.fillna(df_wide[candidate])
281
+ temp["code_candidature"] = code_series
282
+ nom_series = _compose_nom(idx)
283
+ if nom_series is not None:
284
+ temp["nom_candidature"] = nom_series
285
+ frames.append(temp)
286
+
287
+ if not frames:
288
+ return df
289
+ wide_long = pd.concat(frames, ignore_index=True)
290
+ wide_long["voix"] = pd.to_numeric(wide_long["voix"], errors="coerce")
291
+ wide_long = wide_long[wide_long["voix"].notna() & (wide_long["voix"] > 0)]
292
+ return pd.concat([df_long, wide_long], ignore_index=True)
293
+
294
+
295
+ def _mapping_from_yaml(mapping_path: Path) -> pd.DataFrame:
296
+ try:
297
+ import yaml
298
+ except Exception as exc:
299
+ raise RuntimeError("PyYAML est requis pour charger un mapping YAML.") from exc
300
+ raw = yaml.safe_load(mapping_path.read_text()) or {}
301
+ if not isinstance(raw, dict):
302
+ raise ValueError("Mapping YAML invalide: attendu un dictionnaire.")
303
+
304
+ base_mapping = raw.get("base_mapping")
305
+ mapping_entries = raw.get("mapping")
306
+ overrides = raw.get("overrides", [])
307
+
308
+ mapping = pd.DataFrame()
309
+ if mapping_entries:
310
+ mapping = pd.DataFrame(mapping_entries)
311
+ elif base_mapping:
312
+ base_path = Path(base_mapping)
313
+ if not base_path.is_absolute():
314
+ base_path = mapping_path.parent / base_path
315
+ mapping = pd.read_csv(base_path, sep=";")
316
+ else:
317
+ mapping = pd.DataFrame(columns=["code_candidature", "nom_candidature", "bloc_1", "bloc_2", "bloc_3"])
318
+
319
+ if overrides:
320
+ override_df = pd.DataFrame(overrides)
321
+ if not override_df.empty:
322
+ if "blocs" in override_df.columns:
323
+ blocs = override_df["blocs"].apply(lambda v: v if isinstance(v, list) else [])
324
+ override_df["bloc_1"] = blocs.apply(lambda v: v[0] if len(v) > 0 else None)
325
+ override_df["bloc_2"] = blocs.apply(lambda v: v[1] if len(v) > 1 else None)
326
+ override_df["bloc_3"] = blocs.apply(lambda v: v[2] if len(v) > 2 else None)
327
+ override_df = override_df.drop(columns=["blocs"])
328
+ if "code_candidature" not in override_df.columns and "code" in override_df.columns:
329
+ override_df = override_df.rename(columns={"code": "code_candidature"})
330
+ if "nom_candidature" not in override_df.columns and "nom" in override_df.columns:
331
+ override_df = override_df.rename(columns={"nom": "nom_candidature"})
332
+
333
+ if "code_candidature" in mapping.columns:
334
+ mapping["code_candidature"] = _normalize_code_series(mapping["code_candidature"])
335
+ if "code_candidature" in override_df.columns:
336
+ override_df["code_candidature"] = _normalize_code_series(override_df["code_candidature"])
337
+
338
+ mapping = mapping.copy()
339
+ for _, row in override_df.iterrows():
340
+ code = row.get("code_candidature")
341
+ if code is None:
342
+ continue
343
+ mask = mapping["code_candidature"] == code
344
+ if mask.any():
345
+ for col in ["nom_candidature", "bloc_1", "bloc_2", "bloc_3"]:
346
+ if col in row and pd.notna(row[col]):
347
+ mapping.loc[mask, col] = row[col]
348
+ else:
349
+ mapping = pd.concat([mapping, pd.DataFrame([row])], ignore_index=True)
350
+ return mapping
351
+
352
+
353
+ def load_mapping(mapping_path: Path) -> pd.DataFrame:
354
+ if not mapping_path.exists():
355
+ raise FileNotFoundError(f"Mapping candidats/blocs manquant : {mapping_path}")
356
+ if mapping_path.suffix in {".yml", ".yaml"}:
357
+ mapping = _mapping_from_yaml(mapping_path)
358
+ else:
359
+ mapping = pd.read_csv(mapping_path, sep=";")
360
+ if "code_candidature" in mapping.columns:
361
+ mapping["code_candidature"] = _normalize_code_series(mapping["code_candidature"])
362
+ bloc_cols = [c for c in mapping.columns if c.startswith("bloc")]
363
+ for col in bloc_cols:
364
+ mapping[col] = mapping[col].apply(normalize_category)
365
+ return mapping
366
+
367
+
368
+ def expand_by_category(elections_long: pd.DataFrame, mapping: pd.DataFrame) -> pd.DataFrame:
369
+ df = elections_long.merge(mapping, on="code_candidature", how="left", suffixes=("", "_map"))
370
+ records: list[dict] = []
371
+ for row in df.itertuples(index=False):
372
+ blocs = [getattr(row, col, None) for col in ["bloc_1", "bloc_2", "bloc_3"]]
373
+ blocs = [normalize_category(b) for b in blocs if isinstance(b, str) or b is not None]
374
+ blocs = [b for b in blocs if b is not None]
375
+ voix = getattr(row, "voix", 0) or 0
376
+ exprimes = getattr(row, "exprimes", np.nan)
377
+ votants = getattr(row, "votants", np.nan)
378
+ inscrits = getattr(row, "inscrits", np.nan)
379
+ blancs = getattr(row, "blancs", np.nan)
380
+ nuls = getattr(row, "nuls", np.nan)
381
+ if not blocs:
382
+ election_type = getattr(row, "election_type", None)
383
+ if election_type == "presidentielles":
384
+ nom = getattr(row, "nom_candidature", None)
385
+ mapped = _category_from_name(nom)
386
+ if mapped:
387
+ blocs = [mapped]
388
+ elif election_type == "europeennes":
389
+ nom = getattr(row, "nom_candidature", None)
390
+ mapped = _category_from_list_name(nom)
391
+ if mapped:
392
+ blocs = [mapped]
393
+ if not blocs:
394
+ # Fallback explicite : non mappé -> centre (évite un panel vide)
395
+ blocs = ["centre"]
396
+ part = voix / len(blocs) if len(blocs) > 0 else 0
397
+ for bloc in blocs:
398
+ records.append(
399
+ {
400
+ "commune_code": getattr(row, "code_commune"),
401
+ "code_bv": getattr(row, "code_bv"),
402
+ "election_type": getattr(row, "election_type"),
403
+ "election_year": int(getattr(row, "election_year")),
404
+ "round": int(getattr(row, "round")) if not pd.isna(getattr(row, "round")) else None,
405
+ "date_scrutin": getattr(row, "date_scrutin"),
406
+ "category": bloc,
407
+ "voix_cat": part,
408
+ "exprimes": exprimes,
409
+ "votants": votants,
410
+ "inscrits": inscrits,
411
+ "blancs": blancs,
412
+ "nuls": nuls,
413
+ }
414
+ )
415
+ return pd.DataFrame.from_records(records)
416
+
417
+
418
+ def aggregate_by_event(df: pd.DataFrame) -> pd.DataFrame:
419
+ group_cols = INDEX_COLS + ["category"]
420
+ agg = (
421
+ df.groupby(group_cols, as_index=False)
422
+ .agg(
423
+ voix_cat=("voix_cat", "sum"),
424
+ exprimes=("exprimes", "max"),
425
+ votants=("votants", "max"),
426
+ inscrits=("inscrits", "max"),
427
+ blancs=("blancs", "max"),
428
+ nuls=("nuls", "max"),
429
+ )
430
+ )
431
+ agg["share"] = agg["voix_cat"] / agg["exprimes"].replace(0, np.nan)
432
+ base_inscrits = agg["inscrits"].replace(0, np.nan)
433
+ agg["turnout_pct"] = agg["votants"] / base_inscrits
434
+ agg["blancs_pct"] = agg["blancs"] / base_inscrits
435
+ agg["nuls_pct"] = agg["nuls"] / base_inscrits
436
+ return agg
437
+
438
+
439
+ def compute_national_reference(local: pd.DataFrame) -> pd.DataFrame:
440
+ nat_group_cols = ["election_type", "election_year", "round", "category"]
441
+ nat = (
442
+ local.groupby(nat_group_cols, as_index=False)
443
+ .agg(
444
+ voix_cat=("voix_cat", "sum"),
445
+ exprimes=("exprimes", "sum"),
446
+ votants=("votants", "sum"),
447
+ inscrits=("inscrits", "sum"),
448
+ )
449
+ )
450
+ nat["share_nat"] = nat["voix_cat"] / nat["exprimes"].replace(0, np.nan)
451
+ nat["turnout_nat"] = nat["votants"] / nat["inscrits"].replace(0, np.nan)
452
+ return nat[nat_group_cols + ["share_nat", "turnout_nat"]]
453
+
454
+
455
+ def add_lags(local: pd.DataFrame) -> pd.DataFrame:
456
+ df = local.sort_values("date_scrutin").copy()
457
+ df["share_lag_any"] = df.groupby(["code_bv", "category"])["share"].shift(1)
458
+ df["share_lag2_any"] = df.groupby(["code_bv", "category"])["share"].shift(2)
459
+ df["share_lag_same_type"] = df.groupby(["code_bv", "category", "election_type"])["share"].shift(1)
460
+ df["dev_to_nat"] = df["share"] - df["share_nat"]
461
+ df["dev_to_nat_lag_any"] = df.groupby(["code_bv", "category"])["dev_to_nat"].shift(1)
462
+ df["dev_to_nat_lag_same_type"] = df.groupby(["code_bv", "category", "election_type"])["dev_to_nat"].shift(1)
463
+ df["swing_any"] = df["share_lag_any"] - df["share_lag2_any"]
464
+ return df
465
+
466
+
467
+ def _pivot_feature(df: pd.DataFrame, value_col: str, prefix: str) -> pd.DataFrame:
468
+ pivot = df.pivot_table(index=INDEX_COLS, columns="category", values=value_col)
469
+ pivot = pivot[[c for c in pivot.columns if c in CANDIDATE_CATEGORIES]]
470
+ pivot.columns = [f"{prefix}{c}" for c in pivot.columns]
471
+ pivot = pivot.reset_index()
472
+ return pivot
473
+
474
+
475
+ def build_panel(
476
+ elections_long_path: Path,
477
+ mapping_path: Path,
478
+ output_path: Path,
479
+ *,
480
+ csv_output: Path | None = None,
481
+ ) -> pd.DataFrame:
482
+ elections_long = load_elections_long(elections_long_path)
483
+ mapping = load_mapping(mapping_path)
484
+ expanded = expand_by_category(elections_long, mapping)
485
+ local = aggregate_by_event(expanded)
486
+
487
+ nat = compute_national_reference(local)
488
+ local = local.merge(nat, on=["election_type", "election_year", "round", "category"], how="left")
489
+ local = add_lags(local)
490
+
491
+ turnout_event = (
492
+ local.groupby(INDEX_COLS, as_index=False)["turnout_pct"].max().sort_values("date_scrutin")
493
+ )
494
+ turnout_event["prev_turnout_any_lag1"] = turnout_event.groupby("code_bv")["turnout_pct"].shift(1)
495
+ turnout_event["prev_turnout_same_type_lag1"] = turnout_event.groupby(["code_bv", "election_type"])[
496
+ "turnout_pct"
497
+ ].shift(1)
498
+
499
+ datasets: List[pd.DataFrame] = [
500
+ _pivot_feature(local, "share", "target_share_"),
501
+ _pivot_feature(local, "share_lag_any", "prev_share_any_lag1_"),
502
+ _pivot_feature(local, "share_lag_same_type", "prev_share_type_lag1_"),
503
+ _pivot_feature(local, "dev_to_nat_lag_any", "prev_dev_to_national_any_lag1_"),
504
+ _pivot_feature(local, "dev_to_nat_lag_same_type", "prev_dev_to_national_type_lag1_"),
505
+ _pivot_feature(local, "swing_any", "swing_any_"),
506
+ ]
507
+ panel = reduce(lambda left, right: left.merge(right, on=INDEX_COLS, how="left"), datasets)
508
+ panel = panel.merge(
509
+ turnout_event[INDEX_COLS + ["turnout_pct", "prev_turnout_any_lag1", "prev_turnout_same_type_lag1"]],
510
+ on=INDEX_COLS,
511
+ how="left",
512
+ )
513
+
514
+ target_cols = [f"target_share_{c}" for c in CANDIDATE_CATEGORIES]
515
+ for col in target_cols:
516
+ if col not in panel.columns:
517
+ panel[col] = 0.0
518
+ panel[target_cols] = panel[target_cols].fillna(0).clip(lower=0, upper=1)
519
+ panel["target_sum_before_renorm"] = panel[target_cols].sum(axis=1)
520
+ has_mass = panel["target_sum_before_renorm"] > 0
521
+ panel.loc[has_mass, target_cols] = panel.loc[has_mass, target_cols].div(
522
+ panel.loc[has_mass, "target_sum_before_renorm"], axis=0
523
+ )
524
+ panel["target_sum_after_renorm"] = panel[target_cols].sum(axis=1)
525
+
526
+ output_path.parent.mkdir(parents=True, exist_ok=True)
527
+ panel.to_parquet(output_path, index=False)
528
+ if csv_output:
529
+ panel.to_csv(csv_output, sep=";", index=False)
530
+ LOGGER.info("Panel enregistré dans %s (%s lignes)", output_path, len(panel))
531
+ return panel
532
+
533
+
534
+ def parse_args() -> argparse.Namespace:
535
+ parser = argparse.ArgumentParser(description="Construction du dataset panel features+cibles sans fuite temporelle.")
536
+ parser.add_argument(
537
+ "--elections-long",
538
+ type=Path,
539
+ default=Path("data/interim/elections_long.parquet"),
540
+ help="Chemin du format long harmonisé.",
541
+ )
542
+ parser.add_argument(
543
+ "--mapping",
544
+ type=Path,
545
+ default=Path("config/nuances.yaml"),
546
+ help="Mapping nuance -> catégorie.",
547
+ )
548
+ parser.add_argument(
549
+ "--output",
550
+ type=Path,
551
+ default=Path("data/processed/panel.parquet"),
552
+ help="Destination du parquet panel.",
553
+ )
554
+ parser.add_argument(
555
+ "--output-csv",
556
+ type=Path,
557
+ default=Path("data/processed/panel.csv"),
558
+ help="Destination CSV optionnelle.",
559
+ )
560
+ return parser.parse_args()
561
+
562
+
563
+ def main() -> None:
564
+ logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s")
565
+ args = parse_args()
566
+ build_panel(args.elections_long, args.mapping, args.output, csv_output=args.output_csv)
567
+
568
+
569
+ if __name__ == "__main__":
570
+ main()
src/model/predict.py ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import json
5
+ import logging
6
+ from pathlib import Path
7
+ from typing import Dict, List
8
+
9
+ import joblib
10
+ import numpy as np
11
+ import pandas as pd
12
+
13
+ from src.constants import CANDIDATE_CATEGORIES
14
+ from src.features.build_features import (
15
+ aggregate_by_event,
16
+ compute_national_reference,
17
+ expand_by_category,
18
+ load_elections_long,
19
+ load_mapping,
20
+ )
21
+
22
+ LOGGER = logging.getLogger(__name__)
23
+
24
+
25
+ def filter_history(df: pd.DataFrame, target_year: int, commune_code: str | None) -> pd.DataFrame:
26
+ df = df[df["annee"] < target_year]
27
+ if commune_code:
28
+ df = df[df["code_commune"] == commune_code]
29
+ return df
30
+
31
+
32
+ def build_feature_matrix(
33
+ elections_long: pd.DataFrame,
34
+ mapping: pd.DataFrame,
35
+ target_type: str,
36
+ target_year: int,
37
+ ) -> pd.DataFrame:
38
+ expanded = expand_by_category(elections_long, mapping)
39
+ local = aggregate_by_event(expanded)
40
+ nat = compute_national_reference(local)
41
+ local = local.merge(nat, on=["election_type", "election_year", "round", "category"], how="left")
42
+ local["dev_to_nat"] = local["share"] - local["share_nat"]
43
+ local = local.sort_values("date_scrutin")
44
+
45
+ last_any_share = (
46
+ local.sort_values("date_scrutin").groupby(["code_bv", "category"])["share"].last()
47
+ )
48
+ last_any_dev = (
49
+ local.sort_values("date_scrutin").groupby(["code_bv", "category"])["dev_to_nat"].last()
50
+ )
51
+ last_type_share = (
52
+ local[local["election_type"] == target_type]
53
+ .sort_values("date_scrutin")
54
+ .groupby(["code_bv", "category"])["share"]
55
+ .last()
56
+ )
57
+ last_type_dev = (
58
+ local[local["election_type"] == target_type]
59
+ .sort_values("date_scrutin")
60
+ .groupby(["code_bv", "category"])["dev_to_nat"]
61
+ .last()
62
+ )
63
+ # Swing entre les deux derniers scrutins tous types
64
+ swing_any = (
65
+ local.groupby(["code_bv", "category"])["share"]
66
+ .apply(lambda s: s.iloc[-1] - s.iloc[-2] if len(s) >= 2 else np.nan)
67
+ .rename("swing_any")
68
+ )
69
+
70
+ turnout_any = local.groupby("code_bv")["turnout_pct"].last()
71
+ turnout_type = (
72
+ local[local["election_type"] == target_type]
73
+ .sort_values("date_scrutin")
74
+ .groupby("code_bv")["turnout_pct"]
75
+ .last()
76
+ )
77
+
78
+ bureaux = sorted(local["code_bv"].dropna().unique())
79
+ records: List[dict] = []
80
+ for code_bv in bureaux:
81
+ record = {
82
+ "commune_code": str(code_bv).split("-")[0],
83
+ "code_bv": code_bv,
84
+ "election_type": target_type,
85
+ "election_year": target_year,
86
+ "round": 1,
87
+ "date_scrutin": f"{target_year}-01-01",
88
+ "prev_turnout_any_lag1": turnout_any.get(code_bv, np.nan),
89
+ "prev_turnout_same_type_lag1": turnout_type.get(code_bv, np.nan),
90
+ }
91
+ for cat in CANDIDATE_CATEGORIES:
92
+ record[f"prev_share_any_lag1_{cat}"] = last_any_share.get((code_bv, cat), np.nan)
93
+ record[f"prev_share_type_lag1_{cat}"] = last_type_share.get((code_bv, cat), np.nan)
94
+ record[f"prev_dev_to_national_any_lag1_{cat}"] = last_any_dev.get((code_bv, cat), np.nan)
95
+ record[f"prev_dev_to_national_type_lag1_{cat}"] = last_type_dev.get((code_bv, cat), np.nan)
96
+ record[f"swing_any_{cat}"] = swing_any.get((code_bv, cat), np.nan)
97
+ records.append(record)
98
+ return pd.DataFrame.from_records(records)
99
+
100
+
101
+ def compute_references(local: pd.DataFrame, target_year: int) -> Dict[str, Dict[str, float]]:
102
+ refs: Dict[str, Dict[str, float]] = {}
103
+ leg = (
104
+ local[(local["election_type"] == "legislatives") & (local["election_year"] < target_year)]
105
+ .sort_values("date_scrutin")
106
+ .groupby(["code_bv", "category"])
107
+ .last()
108
+ )
109
+ mun2020 = (
110
+ local[(local["election_type"] == "municipales") & (local["election_year"] == 2020)]
111
+ .sort_values("date_scrutin")
112
+ .groupby(["code_bv", "category"])
113
+ .last()
114
+ )
115
+ refs["leg"] = {(code_bv, cat): row["share"] for (code_bv, cat), row in leg.iterrows()}
116
+ refs["mun2020"] = {(code_bv, cat): row["share"] for (code_bv, cat), row in mun2020.iterrows()}
117
+ return refs
118
+
119
+
120
+ def load_feature_columns(path: Path, df: pd.DataFrame) -> List[str]:
121
+ if path.exists():
122
+ return json.loads(path.read_text())
123
+ # fallback: use all non-target columns except identifiers
124
+ exclude = {"commune_code", "code_bv", "election_type", "election_year", "round", "date_scrutin"}
125
+ return [c for c in df.columns if c not in exclude]
126
+
127
+
128
+ def predict(
129
+ model_path: Path,
130
+ feature_df: pd.DataFrame,
131
+ feature_cols: List[str],
132
+ refs: Dict[str, Dict[str, float]],
133
+ ) -> pd.DataFrame:
134
+ model = joblib.load(model_path)
135
+ # Align feature set with trained columns (add missing as NaN)
136
+ missing_cols = [c for c in feature_cols if c not in feature_df.columns]
137
+ for col in missing_cols:
138
+ feature_df[col] = np.nan
139
+ preds = model.predict(feature_df[feature_cols])
140
+ preds = np.clip(preds, 0, 1)
141
+ sums = preds.sum(axis=1, keepdims=True)
142
+ sums[sums == 0] = 1
143
+ preds = preds / sums
144
+ preds_pct = preds * 100
145
+
146
+ rows = []
147
+ for idx, row in feature_df.iterrows():
148
+ code_bv = row["code_bv"]
149
+ record = {
150
+ "commune_code": row["commune_code"],
151
+ "code_bv": code_bv,
152
+ }
153
+ for cat_idx, cat in enumerate(CANDIDATE_CATEGORIES):
154
+ pred_val = preds_pct[idx, cat_idx]
155
+ record[f"predicted_share_{cat}"] = round(float(pred_val), 2)
156
+ leg_ref = refs["leg"].get((code_bv, cat))
157
+ mun_ref = refs["mun2020"].get((code_bv, cat))
158
+ record[f"delta_leg_{cat}"] = "N/A" if leg_ref is None else round(float(pred_val - leg_ref * 100), 2)
159
+ record[f"delta_mun2020_{cat}"] = "N/A" if mun_ref is None else round(float(pred_val - mun_ref * 100), 2)
160
+ rows.append(record)
161
+ return pd.DataFrame(rows)
162
+
163
+
164
+ def main() -> None:
165
+ parser = argparse.ArgumentParser(description="Prédictions bureau par bureau pour une échéance cible.")
166
+ parser.add_argument("--model-path", type=Path, default=Path("models/hist_gradient_boosting.joblib"), help="Modèle entraîné.")
167
+ parser.add_argument("--feature-columns", type=Path, default=Path("models/feature_columns.json"), help="Colonnes de features attendues.")
168
+ parser.add_argument("--elections-long", type=Path, default=Path("data/interim/elections_long.parquet"), help="Historique long.")
169
+ parser.add_argument("--mapping", type=Path, default=Path("config/nuances.yaml"), help="Mapping nuances->catégories.")
170
+ parser.add_argument("--target-election-type", type=str, default="municipales", help="Type d'élection cible.")
171
+ parser.add_argument("--target-year", type=int, default=2026, help="Année cible.")
172
+ parser.add_argument("--commune-code", type=str, default="34301", help="Code commune à filtrer (Sete=34301).")
173
+ parser.add_argument("--output-dir", type=Path, default=Path("predictions"), help="Répertoire de sortie.")
174
+ args = parser.parse_args()
175
+
176
+ logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s")
177
+
178
+ elections_long = load_elections_long(args.elections_long)
179
+ elections_long = filter_history(elections_long, args.target_year, args.commune_code)
180
+ mapping = load_mapping(args.mapping)
181
+
182
+ feature_df = build_feature_matrix(elections_long, mapping, args.target_election_type, args.target_year)
183
+ if feature_df.empty:
184
+ raise RuntimeError("Aucune donnée historique disponible pour construire les features.")
185
+ feature_cols = load_feature_columns(args.feature_columns, feature_df)
186
+ refs = compute_references(
187
+ aggregate_by_event(expand_by_category(elections_long, mapping)).assign(
188
+ election_type=lambda d: d["election_type"]
189
+ ),
190
+ args.target_year,
191
+ )
192
+ preds_df = predict(args.model_path, feature_df, feature_cols, refs)
193
+
194
+ args.output_dir.mkdir(parents=True, exist_ok=True)
195
+ output_path = args.output_dir / f"pred_{args.target_election_type}_{args.target_year}_sete.csv"
196
+ preds_df.to_csv(output_path, index=False)
197
+ LOGGER.info("Prédictions écrites dans %s", output_path)
198
+
199
+
200
+ if __name__ == "__main__":
201
+ main()
src/model/train.py ADDED
@@ -0,0 +1,666 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import json
5
+ import logging
6
+ import sys
7
+ from dataclasses import dataclass
8
+ from pathlib import Path
9
+ from typing import Dict, List, Optional, Tuple
10
+
11
+ import joblib
12
+ import numpy as np
13
+ import pandas as pd
14
+ from sklearn.compose import ColumnTransformer
15
+ from sklearn.base import BaseEstimator, RegressorMixin, clone
16
+ from sklearn.ensemble import HistGradientBoostingClassifier, HistGradientBoostingRegressor
17
+ from sklearn.impute import SimpleImputer
18
+ from sklearn.linear_model import Ridge
19
+ from sklearn.metrics import (
20
+ explained_variance_score,
21
+ mean_absolute_error,
22
+ mean_squared_error,
23
+ median_absolute_error,
24
+ r2_score,
25
+ )
26
+ from sklearn.model_selection import TimeSeriesSplit
27
+ from sklearn.multioutput import MultiOutputRegressor
28
+ from sklearn.pipeline import Pipeline
29
+ from sklearn.preprocessing import StandardScaler
30
+ from sklearn.utils.validation import check_is_fitted
31
+
32
+ # Ensure project root is on sys.path when running as a script
33
+ PROJECT_ROOT = Path(__file__).resolve().parents[2]
34
+ if str(PROJECT_ROOT) not in sys.path:
35
+ sys.path.append(str(PROJECT_ROOT))
36
+
37
+ from src.constants import CANDIDATE_CATEGORIES
38
+
39
+ LOGGER = logging.getLogger(__name__)
40
+
41
+ TARGET_COLS = [f"target_share_{c}" for c in CANDIDATE_CATEGORIES]
42
+ META_COLS = [
43
+ "commune_code",
44
+ "code_bv",
45
+ "election_type",
46
+ "election_year",
47
+ "round",
48
+ "date_scrutin",
49
+ "target_sum_before_renorm",
50
+ "target_sum_after_renorm",
51
+ ]
52
+
53
+ MODEL_GRIDS: Dict[str, List[Dict[str, object]]] = {
54
+ "ridge": [
55
+ {"alpha": 0.1},
56
+ {"alpha": 1.0},
57
+ {"alpha": 10.0},
58
+ {"alpha": 50.0},
59
+ ],
60
+ "hist_gradient_boosting": [
61
+ {"max_depth": 3, "learning_rate": 0.08, "max_iter": 400, "min_samples_leaf": 30, "l2_regularization": 0.1},
62
+ {"max_depth": 4, "learning_rate": 0.05, "max_iter": 600, "min_samples_leaf": 20, "l2_regularization": 0.1},
63
+ {"max_depth": 4, "learning_rate": 0.1, "max_iter": 300, "min_samples_leaf": 50, "l2_regularization": 1.0},
64
+ {"max_depth": 6, "learning_rate": 0.05, "max_iter": 500, "min_samples_leaf": 40, "l2_regularization": 0.5},
65
+ {"max_depth": 3, "learning_rate": 0.05, "max_iter": 500, "min_samples_leaf": 80, "l2_regularization": 1.0},
66
+ {"max_depth": 3, "learning_rate": 0.04, "max_iter": 600, "min_samples_leaf": 120, "l2_regularization": 2.0},
67
+ {"max_depth": 2, "learning_rate": 0.08, "max_iter": 500, "min_samples_leaf": 150, "l2_regularization": 3.0},
68
+ ],
69
+ "lightgbm": [
70
+ {"n_estimators": 600, "learning_rate": 0.05, "num_leaves": 31, "subsample": 0.8, "colsample_bytree": 0.8},
71
+ {"n_estimators": 400, "learning_rate": 0.08, "num_leaves": 16, "min_child_samples": 30, "subsample": 0.7, "colsample_bytree": 0.7},
72
+ ],
73
+ "xgboost": [
74
+ {"n_estimators": 600, "learning_rate": 0.05, "max_depth": 6, "subsample": 0.8, "colsample_bytree": 0.8},
75
+ {"n_estimators": 400, "learning_rate": 0.08, "max_depth": 4, "subsample": 0.7, "colsample_bytree": 0.7},
76
+ ],
77
+ "two_stage_hgb": [
78
+ {
79
+ "clf_params": {"max_depth": 3, "learning_rate": 0.08, "max_iter": 300, "min_samples_leaf": 30, "l2_regularization": 0.1},
80
+ "reg_params": {"max_depth": 3, "learning_rate": 0.08, "max_iter": 400, "min_samples_leaf": 30, "l2_regularization": 0.1},
81
+ "epsilon": 1e-4,
82
+ "use_logit": True,
83
+ "use_proba": True,
84
+ },
85
+ {
86
+ "clf_params": {"max_depth": 2, "learning_rate": 0.1, "max_iter": 300, "min_samples_leaf": 60, "l2_regularization": 0.2},
87
+ "reg_params": {"max_depth": 2, "learning_rate": 0.08, "max_iter": 500, "min_samples_leaf": 60, "l2_regularization": 0.5},
88
+ "epsilon": 1e-4,
89
+ "use_logit": True,
90
+ "use_proba": True,
91
+ },
92
+ ],
93
+ "catboost": [
94
+ {"depth": 6, "learning_rate": 0.05, "iterations": 500},
95
+ {"depth": 4, "learning_rate": 0.08, "iterations": 400},
96
+ ],
97
+ }
98
+
99
+
100
+ @dataclass
101
+ class SplitConfig:
102
+ train_end_year: int
103
+ valid_end_year: int
104
+ test_start_year: int
105
+
106
+
107
+ def load_panel(path: Path) -> pd.DataFrame:
108
+ if not path.exists():
109
+ raise FileNotFoundError(f"Panel introuvable : {path}")
110
+ if path.suffix == ".parquet":
111
+ df = pd.read_parquet(path)
112
+ else:
113
+ df = pd.read_csv(path, sep=";")
114
+ df["election_year"] = pd.to_numeric(df["election_year"], errors="coerce")
115
+ df["round"] = pd.to_numeric(df["round"], errors="coerce")
116
+ return df
117
+
118
+
119
+ def get_feature_columns(df: pd.DataFrame) -> List[str]:
120
+ exclude = set(TARGET_COLS + META_COLS)
121
+ candidates = [c for c in df.columns if c not in exclude]
122
+ numeric_feats = [c for c in candidates if pd.api.types.is_numeric_dtype(df[c])]
123
+ return numeric_feats
124
+
125
+
126
+ def temporal_split(df: pd.DataFrame, cfg: SplitConfig) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
127
+ train = df[df["election_year"] <= cfg.train_end_year]
128
+ valid = df[(df["election_year"] > cfg.train_end_year) & (df["election_year"] <= cfg.valid_end_year)]
129
+ test = df[df["election_year"] >= cfg.test_start_year]
130
+ return train, valid, test
131
+
132
+
133
+ def make_preprocessor(feature_cols: List[str]) -> ColumnTransformer:
134
+ return ColumnTransformer(
135
+ transformers=[
136
+ ("num", Pipeline([("imputer", SimpleImputer(strategy="median")), ("scaler", StandardScaler())]), feature_cols)
137
+ ],
138
+ remainder="drop",
139
+ )
140
+
141
+
142
+ def normalize_predictions(y_pred: np.ndarray) -> np.ndarray:
143
+ y_pred = np.clip(y_pred, 0, 1)
144
+ sums = y_pred.sum(axis=1, keepdims=True)
145
+ sums[sums == 0] = 1
146
+ return y_pred / sums
147
+
148
+
149
+ def regression_metrics(y_true: np.ndarray, y_pred: np.ndarray) -> Dict[str, float]:
150
+ y_true = np.asarray(y_true)
151
+ y_pred = np.asarray(y_pred)
152
+ y_pred = normalize_predictions(y_pred)
153
+ y_true_flat = y_true.reshape(-1)
154
+ y_pred_flat = y_pred.reshape(-1)
155
+ mae = float(mean_absolute_error(y_true_flat, y_pred_flat))
156
+ rmse = float(np.sqrt(mean_squared_error(y_true_flat, y_pred_flat)))
157
+ medae = float(median_absolute_error(y_true_flat, y_pred_flat))
158
+ r2 = float(r2_score(y_true_flat, y_pred_flat)) if len(y_true_flat) > 1 else np.nan
159
+ evs = float(explained_variance_score(y_true_flat, y_pred_flat)) if len(y_true_flat) > 1 else np.nan
160
+ denom = float(np.sum(np.abs(y_true_flat)))
161
+ wape = float(np.sum(np.abs(y_true_flat - y_pred_flat)) / denom) if denom > 0 else np.nan
162
+ smape = float(np.mean(2 * np.abs(y_pred_flat - y_true_flat) / (np.abs(y_true_flat) + np.abs(y_pred_flat) + 1e-9)))
163
+ bias = float(np.mean(y_pred_flat - y_true_flat))
164
+ winner_true = np.argmax(y_true, axis=1)
165
+ winner_pred = np.argmax(y_pred, axis=1)
166
+ winner_acc = float(np.mean(winner_true == winner_pred)) if len(winner_true) else np.nan
167
+ metrics = {
168
+ "mae_mean": mae,
169
+ "rmse": rmse,
170
+ "medae": medae,
171
+ "r2": r2,
172
+ "explained_var": evs,
173
+ "wape": wape,
174
+ "smape": smape,
175
+ "bias": bias,
176
+ "winner_accuracy": winner_acc,
177
+ }
178
+ for idx, cat in enumerate(CANDIDATE_CATEGORIES):
179
+ metrics[f"mae_{cat}"] = float(mean_absolute_error(y_true[:, idx], y_pred[:, idx]))
180
+ return metrics
181
+
182
+
183
+ def build_event_folds(df: pd.DataFrame, n_splits: int) -> List[Tuple[np.ndarray, np.ndarray]]:
184
+ if df.empty:
185
+ return []
186
+ work = df.copy()
187
+ work["date_scrutin"] = pd.to_datetime(work.get("date_scrutin"), errors="coerce") # type: ignore
188
+ if work["date_scrutin"].isna().all():
189
+ work["date_scrutin"] = pd.to_datetime(work["election_year"], format="%Y", errors="coerce")
190
+ work["event_key"] = (
191
+ work["election_type"].astype(str).str.lower().str.strip()
192
+ + "|"
193
+ + work["election_year"].astype(str)
194
+ + "|"
195
+ + work["round"].astype(str)
196
+ )
197
+ events = (
198
+ work[["event_key", "date_scrutin"]]
199
+ .dropna(subset=["event_key", "date_scrutin"])
200
+ .drop_duplicates()
201
+ .sort_values("date_scrutin")
202
+ .reset_index(drop=True)
203
+ )
204
+ if len(events) < 2:
205
+ return []
206
+ max_splits = min(n_splits, len(events) - 1)
207
+ tscv = TimeSeriesSplit(n_splits=max_splits)
208
+ folds = []
209
+ for train_evt_idx, test_evt_idx in tscv.split(events):
210
+ train_keys = set(events.iloc[train_evt_idx]["event_key"])
211
+ test_keys = set(events.iloc[test_evt_idx]["event_key"])
212
+ train_idx = work.index[work["event_key"].isin(train_keys)].to_numpy()
213
+ test_idx = work.index[work["event_key"].isin(test_keys)].to_numpy()
214
+ folds.append((train_idx, test_idx))
215
+ return folds
216
+
217
+
218
+ class TwoStageRegressor(BaseEstimator, RegressorMixin):
219
+ def __init__(
220
+ self,
221
+ classifier: Optional[BaseEstimator] = None,
222
+ regressor: Optional[BaseEstimator] = None,
223
+ epsilon: float = 1e-4,
224
+ positive_threshold: float = 0.5,
225
+ use_proba: bool = True,
226
+ use_logit: bool = True,
227
+ logit_eps: float = 1e-6,
228
+ ) -> None:
229
+ self.classifier = classifier
230
+ self.regressor = regressor
231
+ self.epsilon = epsilon
232
+ self.positive_threshold = positive_threshold
233
+ self.use_proba = use_proba
234
+ self.use_logit = use_logit
235
+ self.logit_eps = logit_eps
236
+
237
+ def _default_classifier(self) -> BaseEstimator:
238
+ return HistGradientBoostingClassifier(random_state=42)
239
+
240
+ def _default_regressor(self) -> BaseEstimator:
241
+ return HistGradientBoostingRegressor(random_state=42)
242
+
243
+ def fit(self, X, y):
244
+ y = np.asarray(y).ravel()
245
+ mask_pos = y > self.epsilon
246
+
247
+ self._constant_proba = None
248
+ if mask_pos.all() or (~mask_pos).all():
249
+ self._constant_proba = float(mask_pos.mean())
250
+ self.classifier_ = None
251
+ else:
252
+ classifier = self.classifier if self.classifier is not None else self._default_classifier()
253
+ self.classifier_ = clone(classifier)
254
+ self.classifier_.fit(X, mask_pos.astype(int))
255
+
256
+ self.regressor_ = None
257
+ if mask_pos.any():
258
+ regressor = self.regressor if self.regressor is not None else self._default_regressor()
259
+ self.regressor_ = clone(regressor)
260
+ y_reg = y[mask_pos]
261
+ if self.use_logit:
262
+ y_reg = np.clip(y_reg, self.logit_eps, 1 - self.logit_eps)
263
+ y_reg = np.log(y_reg / (1 - y_reg))
264
+ self.regressor_.fit(X[mask_pos], y_reg)
265
+ return self
266
+
267
+ def predict(self, X):
268
+ if self._constant_proba is not None:
269
+ proba = np.full(len(X), self._constant_proba, dtype=float)
270
+ else:
271
+ check_is_fitted(self, ["classifier_"])
272
+ if self.use_proba and hasattr(self.classifier_, "predict_proba"):
273
+ proba = self.classifier_.predict_proba(X)[:, 1] # type: ignore
274
+ else:
275
+ proba = self.classifier_.predict(X) # type: ignore
276
+ proba = np.asarray(proba, dtype=float)
277
+
278
+ if self.regressor_ is None:
279
+ reg_pred = np.zeros(len(proba), dtype=float)
280
+ else:
281
+ reg_pred = np.asarray(self.regressor_.predict(X), dtype=float)
282
+ if self.use_logit:
283
+ reg_pred = 1 / (1 + np.exp(-reg_pred))
284
+ reg_pred = np.clip(reg_pred, 0, 1)
285
+
286
+ if self.use_proba:
287
+ preds = proba * reg_pred
288
+ else:
289
+ preds = np.where(proba >= self.positive_threshold, reg_pred, 0.0)
290
+ return preds
291
+
292
+
293
+ class CatBoostRegressorWrapper(BaseEstimator, RegressorMixin):
294
+ def __init__(self, **params: float | int | str):
295
+ self.params = dict(params)
296
+ self.model_ = None
297
+
298
+ def fit(self, X, y, **fit_params):
299
+ from catboost import CatBoostRegressor
300
+
301
+ self.model_ = CatBoostRegressor(**self.params) # type: ignore
302
+ self.model_.fit(X, y, **fit_params)
303
+ return self
304
+
305
+ def predict(self, X):
306
+ if self.model_ is None:
307
+ raise ValueError("CatBoostRegressorWrapper n'est pas entraîné.")
308
+ return self.model_.predict(X)
309
+
310
+ def get_params(self, deep: bool = True):
311
+ return dict(self.params)
312
+
313
+ def set_params(self, **params):
314
+ self.params.update(params)
315
+ return self
316
+
317
+
318
+ def make_model(model_name: str, feature_cols: List[str], params: Dict[str, object]) -> Optional[Pipeline]:
319
+ preprocessor = make_preprocessor(feature_cols)
320
+ if model_name == "ridge":
321
+ estimator = Ridge(**params) # type: ignore
322
+ elif model_name == "hist_gradient_boosting":
323
+ estimator = HistGradientBoostingRegressor(random_state=42, **params) # type: ignore
324
+ elif model_name == "lightgbm":
325
+ try:
326
+ from lightgbm import LGBMRegressor
327
+ except Exception:
328
+ LOGGER.info("LightGBM indisponible, ignoré.")
329
+ return None
330
+ estimator = LGBMRegressor(random_state=42, force_row_wise=True, verbosity=-1, **params) # type: ignore
331
+ elif model_name == "xgboost":
332
+ try:
333
+ from xgboost import XGBRegressor
334
+ except Exception:
335
+ LOGGER.info("XGBoost indisponible, ignoré.")
336
+ return None
337
+ estimator = XGBRegressor(random_state=42, **params)
338
+ elif model_name == "two_stage_hgb":
339
+ clf_params = params.get("clf_params", {})
340
+ reg_params = params.get("reg_params", {})
341
+ estimator = TwoStageRegressor(
342
+ classifier=HistGradientBoostingClassifier(random_state=42, **clf_params), # type: ignore
343
+ regressor=HistGradientBoostingRegressor(random_state=42, **reg_params), # type: ignore
344
+ epsilon=params.get("epsilon", 1e-4), # type: ignore
345
+ positive_threshold=params.get("positive_threshold", 0.5), # type: ignore
346
+ use_proba=bool(params.get("use_proba", True)),
347
+ use_logit=bool(params.get("use_logit", True)),
348
+ logit_eps=params.get("logit_eps", 1e-6), # type: ignore
349
+ )
350
+ elif model_name == "catboost":
351
+ try:
352
+ from catboost import CatBoostRegressor
353
+ except Exception:
354
+ LOGGER.info("CatBoost indisponible, ignoré.")
355
+ return None
356
+ if not hasattr(CatBoostRegressor, "__sklearn_tags__"):
357
+ estimator = CatBoostRegressorWrapper(verbose=0, random_state=42, **params) # type: ignore
358
+ else:
359
+ estimator = CatBoostRegressor(verbose=0, random_state=42, **params) # type: ignore
360
+ else:
361
+ raise ValueError(f"Modèle inconnu: {model_name}")
362
+ # n_jobs=1 to avoid process-based parallelism issues in some environments.
363
+ model = MultiOutputRegressor(estimator, n_jobs=1) # type: ignore
364
+ return Pipeline(
365
+ steps=[
366
+ ("preprocess", preprocessor),
367
+ ("model", model),
368
+ ]
369
+ )
370
+
371
+
372
+ def evaluate(model: Pipeline, X, y_true: np.ndarray) -> Dict[str, float]:
373
+ if X is None or len(X) == 0:
374
+ return {"mae_mean": np.nan}
375
+ y_pred = model.predict(X)
376
+ return regression_metrics(y_true, y_pred) # type: ignore
377
+
378
+
379
+ def evaluate_cv(
380
+ model: Pipeline,
381
+ df: pd.DataFrame,
382
+ feature_cols: List[str],
383
+ n_splits: int,
384
+ target_cols: List[str],
385
+ ) -> Dict[str, float]:
386
+ folds = build_event_folds(df, n_splits)
387
+ if not folds:
388
+ return {"folds_used": 0}
389
+ metrics_acc: Dict[str, list[float]] = {}
390
+ for train_idx, test_idx in folds:
391
+ model_clone = clone(model)
392
+ X_train = df.iloc[train_idx][feature_cols]
393
+ y_train = df.iloc[train_idx][target_cols].values
394
+ X_test = df.iloc[test_idx][feature_cols]
395
+ y_test = df.iloc[test_idx][target_cols].values
396
+ model_clone.fit(X_train, y_train)
397
+ fold_metrics = evaluate(model_clone, X_test, y_test)
398
+ for key, value in fold_metrics.items():
399
+ metrics_acc.setdefault(key, []).append(value)
400
+ summary = {f"cv_{k}": float(np.nanmean(v)) for k, v in metrics_acc.items()}
401
+ summary["folds_used"] = len(folds)
402
+ return summary
403
+
404
+
405
+ def compute_cv_residual_intervals(
406
+ model: Pipeline,
407
+ df: pd.DataFrame,
408
+ feature_cols: List[str],
409
+ target_cols: List[str],
410
+ n_splits: int,
411
+ quantiles: Tuple[float, ...] = (0.05, 0.1, 0.9, 0.95),
412
+ ) -> Dict[str, object]:
413
+ folds = build_event_folds(df, n_splits)
414
+ if not folds:
415
+ return {"folds_used": 0, "quantiles": list(quantiles), "residuals": {}}
416
+
417
+ residuals_by_cat: Dict[str, list[float]] = {cat: [] for cat in CANDIDATE_CATEGORIES}
418
+ for train_idx, test_idx in folds:
419
+ model_clone = clone(model)
420
+ X_train = df.iloc[train_idx][feature_cols]
421
+ y_train = df.iloc[train_idx][target_cols].values
422
+ X_test = df.iloc[test_idx][feature_cols]
423
+ y_test = df.iloc[test_idx][target_cols].values
424
+ model_clone.fit(X_train, y_train)
425
+ y_pred = model_clone.predict(X_test)
426
+ y_pred = normalize_predictions(y_pred)
427
+ resid = y_pred - y_test
428
+ for idx, cat in enumerate(CANDIDATE_CATEGORIES):
429
+ residuals_by_cat[cat].extend(resid[:, idx].tolist())
430
+
431
+ quantile_keys = [f"q{int(q * 100):02d}" for q in quantiles]
432
+ summary: Dict[str, Dict[str, float]] = {}
433
+ for cat, values in residuals_by_cat.items():
434
+ arr = np.asarray(values, dtype=float)
435
+ if arr.size == 0:
436
+ continue
437
+ q_vals = np.quantile(arr, quantiles).tolist()
438
+ entry = {key: float(val) for key, val in zip(quantile_keys, q_vals)}
439
+ entry["mean"] = float(np.mean(arr))
440
+ entry["std"] = float(np.std(arr))
441
+ entry["n"] = int(arr.size)
442
+ summary[cat] = entry
443
+
444
+ return {
445
+ "folds_used": len(folds),
446
+ "quantiles": list(quantiles),
447
+ "residuals": summary,
448
+ }
449
+
450
+
451
+ def add_cv_selection_helpers(cv_summary: pd.DataFrame) -> pd.DataFrame:
452
+ work = cv_summary.copy()
453
+ block_cols = [c for c in work.columns if c.startswith("cv_mae_") and c != "cv_mae_mean"]
454
+ if block_cols:
455
+ work["worst_block_mae"] = work[block_cols].max(axis=1)
456
+ if "cv_bias" in work.columns:
457
+ work["bias_abs"] = work["cv_bias"].abs()
458
+ return work
459
+
460
+
461
+ def select_best_model(cv_summary: pd.DataFrame) -> Tuple[str, Dict[str, object]]:
462
+ if cv_summary.empty:
463
+ raise RuntimeError("Aucun modèle évalué.")
464
+ work = add_cv_selection_helpers(cv_summary)
465
+ bias_threshold = 0.02
466
+ candidates = work
467
+ if "bias_abs" in work.columns:
468
+ filtered = work[work["bias_abs"] <= bias_threshold]
469
+ if not filtered.empty:
470
+ candidates = filtered
471
+ sort_cols = [c for c in ["cv_mae_mean", "worst_block_mae", "bias_abs", "cv_rmse", "cv_smape"] if c in candidates.columns]
472
+ best_row = candidates.sort_values(sort_cols, na_position="last").iloc[0]
473
+ return str(best_row["model"]), dict(best_row["params"])
474
+
475
+
476
+ def save_metrics(
477
+ metrics: Dict[str, Dict[str, Dict[str, float]]],
478
+ output_dir: Path,
479
+ cv_summary: pd.DataFrame | None = None,
480
+ ) -> None:
481
+ output_dir.mkdir(parents=True, exist_ok=True)
482
+ with (output_dir / "metrics.json").open("w", encoding="utf-8") as f:
483
+ json.dump(metrics, f, indent=2)
484
+
485
+ if cv_summary is not None and not cv_summary.empty:
486
+ cv_summary.to_csv(output_dir / "cv_summary.csv", index=False)
487
+ lines = ["# Métriques (parts, 0-1)\n"]
488
+ for model_name, splits in metrics.items():
489
+ lines.append(f"## {model_name}")
490
+ for split, vals in splits.items():
491
+ lines.append(
492
+ f"- {split} mae_mean: {vals.get('mae_mean', float('nan')):.4f}, "
493
+ f"rmse: {vals.get('rmse', float('nan')):.4f}, "
494
+ f"wape: {vals.get('wape', float('nan')):.4f}, "
495
+ f"winner_acc: {vals.get('winner_accuracy', float('nan')):.3f}"
496
+ )
497
+ lines.append("")
498
+ (output_dir / "metrics.md").write_text("\n".join(lines), encoding="utf-8")
499
+
500
+
501
+ def save_model_card(
502
+ model_name: str,
503
+ cfg: SplitConfig,
504
+ feature_cols: List[str],
505
+ metrics: Dict[str, Dict[str, Dict[str, float]]],
506
+ output_dir: Path,
507
+ ) -> None:
508
+ lines = [
509
+ "# Model card",
510
+ f"- Modèle: {model_name}",
511
+ f"- Split temporel: train<= {cfg.train_end_year}, valid<= {cfg.valid_end_year}, test>= {cfg.test_start_year}",
512
+ f"- Features: {len(feature_cols)} colonnes numériques (lags, écarts national, swing, turnout)",
513
+ "- Cibles: parts par bloc (7 catégories) renormalisées.",
514
+ "- Métriques principales (MAE moyen, jeux valid/test):",
515
+ f" - Valid: {metrics[model_name]['valid'].get('mae_mean', float('nan')):.4f}",
516
+ f" - Test: {metrics[model_name]['test'].get('mae_mean', float('nan')):.4f}",
517
+ ]
518
+ output_dir.mkdir(parents=True, exist_ok=True)
519
+ (output_dir / "model_card.md").write_text("\n".join(lines), encoding="utf-8")
520
+
521
+
522
+ def plot_mae_per_category(model_name: str, mae_scores: Dict[str, float], output_dir: Path) -> None:
523
+ try:
524
+ import matplotlib.pyplot as plt
525
+ except Exception:
526
+ LOGGER.warning("Matplotlib indisponible, skip figure.")
527
+ return
528
+ if not all(f"mae_{c}" in mae_scores for c in CANDIDATE_CATEGORIES):
529
+ LOGGER.warning("Scores MAE par categorie indisponibles, skip figure.")
530
+ return
531
+ cats = CANDIDATE_CATEGORIES
532
+ values = [mae_scores[f"mae_{c}"] for c in cats]
533
+ plt.figure(figsize=(8, 4))
534
+ plt.bar(cats, values, color="#2c7fb8")
535
+ plt.xticks(rotation=30, ha="right")
536
+ plt.ylabel("MAE (part)")
537
+ plt.title(f"MAE par catégorie - {model_name}")
538
+ output_dir.mkdir(parents=True, exist_ok=True)
539
+ plt.tight_layout()
540
+ plt.savefig(output_dir / "mae_per_category.png")
541
+ plt.close()
542
+
543
+
544
+ def main() -> None:
545
+ parser = argparse.ArgumentParser(description="Entraînement et évaluation temporelle multi-blocs.")
546
+ parser.add_argument("--panel", type=Path, default=Path("data/processed/panel.parquet"), help="Dataset panel parquet.")
547
+ parser.add_argument("--models-dir", type=Path, default=Path("models"), help="Répertoire de sauvegarde des modèles.")
548
+ parser.add_argument("--reports-dir", type=Path, default=Path("reports"), help="Répertoire de sortie des rapports.")
549
+ parser.add_argument("--train-end-year", type=int, default=2019, help="Dernière année incluse dans le train.")
550
+ parser.add_argument("--valid-end-year", type=int, default=2021, help="Dernière année incluse dans la validation.")
551
+ parser.add_argument("--test-start-year", type=int, default=2022, help="Première année du test (inclusif).")
552
+ parser.add_argument("--cv-splits", type=int, default=4, help="Nombre de folds temporels pour la CV par scrutin.")
553
+ parser.add_argument("--no-tune", action="store_true", help="Désactiver la recherche d'hyperparamètres.")
554
+ parser.add_argument("--max-trials", type=int, default=0, help="Limiter le nombre d'essais par modèle (0=all).")
555
+ parser.add_argument(
556
+ "--models",
557
+ nargs="+",
558
+ default=list(MODEL_GRIDS.keys()),
559
+ help="Liste des modèles à tester (ridge, hist_gradient_boosting, lightgbm, xgboost, two_stage_hgb, catboost).",
560
+ )
561
+ args = parser.parse_args()
562
+
563
+ logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s")
564
+ cfg = SplitConfig(train_end_year=args.train_end_year, valid_end_year=args.valid_end_year, test_start_year=args.test_start_year)
565
+
566
+ panel = load_panel(args.panel)
567
+ panel = panel.dropna(subset=TARGET_COLS)
568
+ feature_cols = get_feature_columns(panel)
569
+ all_na = [c for c in feature_cols if panel[c].isna().all()]
570
+ if all_na:
571
+ LOGGER.warning("Features supprimées car entièrement NA: %s", all_na)
572
+ feature_cols = [c for c in feature_cols if c not in all_na]
573
+
574
+ train_df, valid_df, test_df = temporal_split(panel, cfg)
575
+ train_valid_df = panel[panel["election_year"] < cfg.test_start_year].copy().reset_index(drop=True)
576
+
577
+ models_to_run = [m for m in args.models if m in MODEL_GRIDS]
578
+ if not models_to_run:
579
+ raise RuntimeError("Aucun modèle demandé n'est reconnu.")
580
+
581
+ cv_rows: List[Dict[str, object]] = []
582
+ if not args.no_tune:
583
+ rng = np.random.default_rng(42)
584
+ for model_name in models_to_run:
585
+ grid = MODEL_GRIDS[model_name]
586
+ if args.max_trials and len(grid) > args.max_trials:
587
+ indices = rng.choice(len(grid), size=args.max_trials, replace=False)
588
+ grid = [grid[i] for i in indices]
589
+ for params in grid:
590
+ model = make_model(model_name, feature_cols, params)
591
+ if model is None:
592
+ continue
593
+ cv_metrics = evaluate_cv(model, train_valid_df, feature_cols, args.cv_splits, TARGET_COLS)
594
+ row = {"model": model_name, "params": params, **cv_metrics}
595
+ cv_rows.append(row)
596
+
597
+ cv_summary = pd.DataFrame(cv_rows)
598
+ if not cv_summary.empty:
599
+ cv_summary = cv_summary.dropna(subset=["cv_mae_mean"])
600
+ cv_summary = add_cv_selection_helpers(cv_summary)
601
+ if not cv_summary.empty:
602
+ best_model_name, best_params = select_best_model(cv_summary)
603
+ LOGGER.info("Meilleur modèle CV: %s %s", best_model_name, best_params)
604
+ else:
605
+ best_model_name = models_to_run[0]
606
+ best_params = MODEL_GRIDS[best_model_name][0]
607
+ LOGGER.warning("Pas de CV disponible, fallback sur %s %s", best_model_name, best_params)
608
+
609
+ residual_payload = {}
610
+ model_for_intervals = make_model(best_model_name, feature_cols, best_params)
611
+ if model_for_intervals is not None and not train_valid_df.empty:
612
+ residual_payload = compute_cv_residual_intervals(
613
+ model_for_intervals,
614
+ train_valid_df,
615
+ feature_cols,
616
+ TARGET_COLS,
617
+ args.cv_splits,
618
+ )
619
+ if residual_payload.get("residuals"):
620
+ args.reports_dir.mkdir(parents=True, exist_ok=True)
621
+ (args.reports_dir / "residual_intervals.json").write_text(
622
+ json.dumps(
623
+ {
624
+ "model": best_model_name,
625
+ **residual_payload,
626
+ },
627
+ indent=2,
628
+ ),
629
+ encoding="utf-8",
630
+ )
631
+
632
+ X_train, y_train = train_df[feature_cols], train_df[TARGET_COLS].values
633
+ X_valid, y_valid = valid_df[feature_cols], valid_df[TARGET_COLS].values
634
+ X_test, y_test = test_df[feature_cols], test_df[TARGET_COLS].values
635
+ X_train_valid, y_train_valid = train_valid_df[feature_cols], train_valid_df[TARGET_COLS].values
636
+
637
+ eval_results: Dict[str, Dict[str, Dict[str, float]]] = {}
638
+ best_model_eval = make_model(best_model_name, feature_cols, best_params)
639
+ if best_model_eval is None:
640
+ raise RuntimeError(f"Modèle indisponible: {best_model_name}")
641
+ best_model_eval.fit(X_train, y_train)
642
+ eval_results[best_model_name] = {
643
+ "train": evaluate(best_model_eval, X_train, y_train),
644
+ "valid": evaluate(best_model_eval, X_valid, y_valid),
645
+ "test": evaluate(best_model_eval, X_test, y_test),
646
+ "train_valid": evaluate(best_model_eval, X_train_valid, y_train_valid),
647
+ }
648
+
649
+ best_model_final = make_model(best_model_name, feature_cols, best_params)
650
+ if best_model_final is None:
651
+ raise RuntimeError(f"Modèle indisponible: {best_model_name}")
652
+ best_model_final.fit(X_train_valid, y_train_valid)
653
+
654
+ args.models_dir.mkdir(parents=True, exist_ok=True)
655
+ joblib.dump(best_model_final, args.models_dir / f"{best_model_name}.joblib")
656
+ LOGGER.info("Modèle sauvegardé dans %s", args.models_dir / f"{best_model_name}.joblib")
657
+ (args.models_dir / "feature_columns.json").write_text(json.dumps(feature_cols, indent=2), encoding="utf-8")
658
+ (args.models_dir / "best_model.json").write_text(json.dumps({"name": best_model_name}, indent=2), encoding="utf-8")
659
+
660
+ save_metrics(eval_results, args.reports_dir, cv_summary=cv_summary)
661
+ plot_mae_per_category(best_model_name, eval_results[best_model_name]["test"], args.reports_dir / "figures")
662
+ save_model_card(best_model_name, cfg, feature_cols, eval_results, args.models_dir)
663
+
664
+
665
+ if __name__ == "__main__":
666
+ main()
src/pipeline.py ADDED
@@ -0,0 +1,435 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+ from typing import Iterable, Mapping, Optional
5
+
6
+ import pandas as pd
7
+ import re
8
+ import yaml
9
+
10
+ from .constants import CANDIDATE_CATEGORIES
11
+
12
+
13
+ def normalize_bloc(bloc: str | None) -> str:
14
+ """
15
+ Map bloc labels to the canonical categories used across the project.
16
+ """
17
+ if bloc is None:
18
+ return "centre"
19
+ norm = str(bloc).strip().lower().replace(" ", "_").replace("-", "_")
20
+ synonyms = {
21
+ "droite_moderee": "droite_modere",
22
+ "gauche_moderee": "gauche_modere",
23
+ "doite_dure": "droite_dure",
24
+ "gauche": "gauche_modere",
25
+ "droite": "droite_modere",
26
+ "divers": "centre",
27
+ "divers_droite": "droite_modere",
28
+ "divers_gauche": "gauche_modere",
29
+ "divers_centre": "centre",
30
+ "extreme_gauche": "extreme_gauche",
31
+ "extreme_droite": "extreme_droite",
32
+ }
33
+ norm = synonyms.get(norm, norm)
34
+ if norm not in CANDIDATE_CATEGORIES:
35
+ return "centre"
36
+ return norm
37
+
38
+
39
+ DEFAULT_COMMUNES_PATH = (Path(__file__).resolve().parents[1] / "config" / "communes.yaml")
40
+
41
+
42
+ def _normalize_insee_code(value: str | int | None) -> str:
43
+ if value is None:
44
+ return ""
45
+ cleaned = (
46
+ str(value)
47
+ .strip()
48
+ .replace(".0", "")
49
+ )
50
+ cleaned = re.sub(r"\D", "", cleaned)
51
+ if not cleaned:
52
+ return ""
53
+ if len(cleaned) >= 5:
54
+ return cleaned[:5]
55
+ return cleaned.zfill(5)
56
+
57
+
58
+ def load_target_communes(path: Path = DEFAULT_COMMUNES_PATH) -> dict[str, str]:
59
+ if not path.exists():
60
+ raise FileNotFoundError(f"Fichier communes introuvable: {path}")
61
+ raw = yaml.safe_load(path.read_text()) or {}
62
+ entries = raw.get("communes", raw) if isinstance(raw, dict) else raw
63
+ communes: dict[str, str] = {}
64
+
65
+ if isinstance(entries, dict):
66
+ for code, name in entries.items():
67
+ norm = _normalize_insee_code(code)
68
+ if norm:
69
+ communes[norm] = str(name) if name is not None else ""
70
+ return communes
71
+
72
+ if not isinstance(entries, list):
73
+ raise ValueError("Format YAML invalide: attendu une liste ou un mapping sous 'communes'.")
74
+
75
+ for entry in entries:
76
+ if isinstance(entry, str):
77
+ norm = _normalize_insee_code(entry)
78
+ if norm:
79
+ communes[norm] = ""
80
+ continue
81
+ if isinstance(entry, dict):
82
+ code = entry.get("code_insee") or entry.get("code") or entry.get("insee")
83
+ name = entry.get("nom") or entry.get("name") or ""
84
+ norm = _normalize_insee_code(code)
85
+ if norm:
86
+ communes[norm] = str(name) if name is not None else ""
87
+ continue
88
+ return communes
89
+
90
+
91
+ def load_elections_long(path: Path) -> pd.DataFrame:
92
+ """
93
+ Load the harmonised long format dataset (output of notebook 01_pretraitement).
94
+ """
95
+ if path.suffix == ".parquet":
96
+ df = pd.read_parquet(path)
97
+ else:
98
+ df = pd.read_csv(path, sep=";")
99
+ df["date_scrutin"] = pd.to_datetime(df["date_scrutin"])
100
+ numeric_cols = ["exprimes", "inscrits", "votants", "voix", "blancs", "nuls"]
101
+ for col in numeric_cols:
102
+ if col in df.columns:
103
+ df[col] = pd.to_numeric(df[col], errors="coerce")
104
+ df["voix"] = df["voix"].fillna(0)
105
+ return df
106
+
107
+
108
+ def _mapping_from_yaml(path: Path) -> pd.DataFrame:
109
+ try:
110
+ import yaml
111
+ except Exception as exc:
112
+ raise RuntimeError("PyYAML est requis pour charger un mapping YAML.") from exc
113
+ raw = yaml.safe_load(path.read_text()) or {}
114
+ if not isinstance(raw, dict):
115
+ raise ValueError("Mapping YAML invalide: attendu un dictionnaire.")
116
+
117
+ base_mapping = raw.get("base_mapping")
118
+ mapping_entries = raw.get("mapping")
119
+ overrides = raw.get("overrides", [])
120
+
121
+ mapping = pd.DataFrame()
122
+ if mapping_entries:
123
+ mapping = pd.DataFrame(mapping_entries)
124
+ elif base_mapping:
125
+ base_path = Path(base_mapping)
126
+ if not base_path.is_absolute():
127
+ base_path = path.parent / base_path
128
+ mapping = pd.read_csv(base_path, sep=";")
129
+ else:
130
+ mapping = pd.DataFrame(columns=["code_candidature", "nom_candidature", "bloc_1", "bloc_2", "bloc_3"])
131
+
132
+ if overrides:
133
+ override_df = pd.DataFrame(overrides)
134
+ if not override_df.empty:
135
+ if "blocs" in override_df.columns:
136
+ blocs = override_df["blocs"].apply(lambda v: v if isinstance(v, list) else [])
137
+ override_df["bloc_1"] = blocs.apply(lambda v: v[0] if len(v) > 0 else None)
138
+ override_df["bloc_2"] = blocs.apply(lambda v: v[1] if len(v) > 1 else None)
139
+ override_df["bloc_3"] = blocs.apply(lambda v: v[2] if len(v) > 2 else None)
140
+ override_df = override_df.drop(columns=["blocs"])
141
+ if "code_candidature" not in override_df.columns and "code" in override_df.columns:
142
+ override_df = override_df.rename(columns={"code": "code_candidature"})
143
+ if "nom_candidature" not in override_df.columns and "nom" in override_df.columns:
144
+ override_df = override_df.rename(columns={"nom": "nom_candidature"})
145
+
146
+ mapping = mapping.copy()
147
+ if "code_candidature" in mapping.columns:
148
+ mapping["code_candidature"] = mapping["code_candidature"].astype(str)
149
+ if "code_candidature" in override_df.columns:
150
+ override_df["code_candidature"] = override_df["code_candidature"].astype(str)
151
+
152
+ for _, row in override_df.iterrows():
153
+ code = row.get("code_candidature")
154
+ if code is None:
155
+ continue
156
+ if "code_candidature" in mapping.columns:
157
+ mask = mapping["code_candidature"] == code
158
+ else:
159
+ mask = pd.Series([False] * len(mapping))
160
+ if mask.any():
161
+ for col in ["nom_candidature", "bloc_1", "bloc_2", "bloc_3"]:
162
+ if col in row and pd.notna(row[col]):
163
+ mapping.loc[mask, col] = row[col]
164
+ else:
165
+ mapping = pd.concat([mapping, pd.DataFrame([row])], ignore_index=True)
166
+ return mapping
167
+
168
+
169
+ def load_bloc_mapping(path: Path) -> pd.DataFrame:
170
+ if path.suffix in {".yml", ".yaml"}:
171
+ mapping = _mapping_from_yaml(path)
172
+ else:
173
+ mapping = pd.read_csv(path, sep=";")
174
+ # normalise bloc labels once to avoid surprises downstream
175
+ for col in ["bloc_1", "bloc_2", "bloc_3"]:
176
+ if col in mapping.columns:
177
+ mapping[col] = mapping[col].apply(normalize_bloc)
178
+ return mapping
179
+
180
+
181
+ def expand_voix_by_bloc(elections_long: pd.DataFrame, mapping: pd.DataFrame) -> pd.DataFrame:
182
+ """
183
+ Distribute voix of each candidature across its mapped blocs.
184
+ """
185
+ df = elections_long.merge(mapping, on="code_candidature", how="left")
186
+ records: list[dict] = []
187
+ for _, row in df.iterrows():
188
+ blocs = [row.get("bloc_1"), row.get("bloc_2"), row.get("bloc_3")]
189
+ blocs = [b for b in blocs if isinstance(b, str) and b]
190
+ blocs = [normalize_bloc(b) for b in blocs]
191
+ if not blocs:
192
+ blocs = ["centre"]
193
+ voix = row.get("voix", 0) or 0
194
+ repartition = voix / len(blocs)
195
+ for bloc in blocs:
196
+ records.append(
197
+ {
198
+ "code_bv": row.get("code_bv"),
199
+ "nom_bv": row.get("nom_bv"),
200
+ "date_scrutin": row.get("date_scrutin"),
201
+ "annee": row.get("annee"),
202
+ "type_scrutin": row.get("type_scrutin"),
203
+ "tour": row.get("tour"),
204
+ "bloc": bloc,
205
+ "voix_bloc": repartition,
206
+ "exprimes": row.get("exprimes"),
207
+ "inscrits": row.get("inscrits"),
208
+ "votants": row.get("votants"),
209
+ "blancs": row.get("blancs"),
210
+ "nuls": row.get("nuls"),
211
+ }
212
+ )
213
+ result = pd.DataFrame.from_records(records)
214
+ result["date_scrutin"] = pd.to_datetime(result["date_scrutin"])
215
+ for col in ["voix_bloc", "exprimes", "inscrits", "votants", "blancs", "nuls"]:
216
+ result[col] = pd.to_numeric(result[col], errors="coerce")
217
+ result["part_bloc"] = result["voix_bloc"] / result["exprimes"]
218
+ base_inscrits = result["inscrits"].replace(0, pd.NA)
219
+ result["taux_participation_bv"] = result["votants"] / base_inscrits
220
+ result["taux_blancs_bv"] = result["blancs"] / base_inscrits
221
+ result["taux_nuls_bv"] = result["nuls"] / base_inscrits
222
+ return result
223
+
224
+
225
+ def compute_national_reference(elections_blocs: pd.DataFrame) -> pd.DataFrame:
226
+ """
227
+ Aggregate national part/participation per date & bloc if no external national file is provided.
228
+ """
229
+ grouped = (
230
+ elections_blocs.groupby(["date_scrutin", "bloc"], as_index=False)[["voix_bloc", "exprimes", "votants", "inscrits"]]
231
+ .sum()
232
+ .rename(columns={"voix_bloc": "voix_bloc_nat", "exprimes": "exprimes_nat", "votants": "votants_nat", "inscrits": "inscrits_nat"})
233
+ )
234
+ grouped["part_bloc_national"] = grouped["voix_bloc_nat"] / grouped["exprimes_nat"].replace(0, pd.NA)
235
+ grouped["taux_participation_national"] = grouped["votants_nat"] / grouped["inscrits_nat"].replace(0, pd.NA)
236
+ return grouped[["date_scrutin", "bloc", "part_bloc_national", "taux_participation_national"]]
237
+
238
+
239
+ def attach_national_results(
240
+ elections_blocs: pd.DataFrame,
241
+ resultats_nationaux: Optional[pd.DataFrame] = None,
242
+ ) -> pd.DataFrame:
243
+ """
244
+ Merge national reference scores if provided; otherwise, compute them from the full dataset.
245
+ """
246
+ if resultats_nationaux is None:
247
+ df_nat = compute_national_reference(elections_blocs)
248
+ else:
249
+ df_nat = resultats_nationaux.copy()
250
+ df_nat["date_scrutin"] = pd.to_datetime(df_nat["date_scrutin"])
251
+
252
+ elections_blocs = elections_blocs.merge(df_nat, on=["date_scrutin", "bloc"], how="left")
253
+ elections_blocs["ecart_bloc_vs_national"] = (
254
+ elections_blocs["part_bloc"] - elections_blocs["part_bloc_national"]
255
+ )
256
+ elections_blocs["ecart_participation_vs_nat"] = (
257
+ elections_blocs["taux_participation_bv"] - elections_blocs["taux_participation_national"]
258
+ )
259
+ return elections_blocs
260
+
261
+
262
+ def compute_population_growth(elections_blocs: pd.DataFrame, base_year: int = 2014) -> pd.DataFrame:
263
+ bv_pop = elections_blocs.groupby(["code_bv", "annee"], as_index=False)["inscrits"].mean()
264
+ bv_base = (
265
+ bv_pop[bv_pop["annee"] == base_year][["code_bv", "inscrits"]]
266
+ .rename(columns={"inscrits": "inscrits_base"})
267
+ )
268
+ bv_pop = bv_pop.merge(bv_base, on="code_bv", how="left")
269
+ bv_pop["croissance_inscrits_depuis_base"] = (
270
+ bv_pop["inscrits"] - bv_pop["inscrits_base"]
271
+ ) / bv_pop["inscrits_base"]
272
+
273
+ elections_blocs = elections_blocs.merge(
274
+ bv_pop[["code_bv", "annee", "croissance_inscrits_depuis_base"]],
275
+ on=["code_bv", "annee"],
276
+ how="left",
277
+ )
278
+ return elections_blocs
279
+
280
+
281
+ def add_lag_features(elections_blocs: pd.DataFrame) -> pd.DataFrame:
282
+ df = elections_blocs.sort_values(["code_bv", "bloc", "date_scrutin"])
283
+ df["part_bloc_lag1"] = df.groupby(["code_bv", "bloc"])["part_bloc"].shift(1)
284
+ df["ecart_bloc_vs_national_lag1"] = df.groupby(["code_bv", "bloc"])[
285
+ "ecart_bloc_vs_national"
286
+ ].shift(1)
287
+ df["taux_participation_bv_lag1"] = df.groupby(["code_bv", "bloc"])[
288
+ "taux_participation_bv"
289
+ ].shift(1)
290
+ df["annee_centre"] = df["annee"] - df["annee"].median()
291
+ return df
292
+
293
+
294
+ def filter_target_communes(elections_blocs: pd.DataFrame, target_communes: Mapping[str, str]) -> pd.DataFrame:
295
+ """
296
+ Keep only bureaux belonging to the target communes list.
297
+ """
298
+ df = elections_blocs.copy()
299
+ if "code_commune" in df.columns:
300
+ code_series = df["code_commune"].astype(str)
301
+ else:
302
+ code_series = df["code_bv"].astype(str).str.split("-").str[0]
303
+ code_series = code_series.str.replace(r"\D", "", regex=True).str.zfill(5).str.slice(0, 5)
304
+ df["code_commune"] = code_series
305
+ df["nom_commune"] = df["code_commune"].map(target_communes)
306
+ return df[df["code_commune"].isin(target_communes.keys())]
307
+
308
+
309
+ def compute_commune_event_stats(
310
+ elections_long: pd.DataFrame,
311
+ target_communes: Mapping[str, str],
312
+ ) -> pd.DataFrame:
313
+ df = elections_long.copy()
314
+ if "code_commune" in df.columns:
315
+ code_series = df["code_commune"].astype(str)
316
+ else:
317
+ code_series = df["code_bv"].astype(str).str.split("-").str[0]
318
+ code_series = code_series.str.replace(r"\D", "", regex=True).str.zfill(5).str.slice(0, 5)
319
+ df["code_commune"] = code_series
320
+ df = df[df["code_commune"].isin(target_communes.keys())]
321
+ df["nom_commune"] = df["code_commune"].map(target_communes)
322
+ if "date_scrutin" in df.columns:
323
+ df["date_scrutin"] = pd.to_datetime(df["date_scrutin"], errors="coerce")
324
+ for col in ["exprimes", "inscrits", "votants", "blancs", "nuls"]:
325
+ if col in df.columns:
326
+ df[col] = pd.to_numeric(df[col], errors="coerce")
327
+ else:
328
+ df[col] = pd.NA
329
+
330
+ bv_cols = [c for c in ["code_commune", "code_bv", "type_scrutin", "annee", "tour", "date_scrutin"] if c in df.columns]
331
+ bv_event = (
332
+ df.groupby(bv_cols, as_index=False)
333
+ .agg(
334
+ exprimes=("exprimes", "max"),
335
+ inscrits=("inscrits", "max"),
336
+ votants=("votants", "max"),
337
+ blancs=("blancs", "max"),
338
+ nuls=("nuls", "max"),
339
+ )
340
+ )
341
+ commune_cols = [c for c in ["code_commune", "type_scrutin", "annee", "tour", "date_scrutin"] if c in bv_event.columns]
342
+ commune = (
343
+ bv_event.groupby(commune_cols, as_index=False)
344
+ .agg(
345
+ exprimes=("exprimes", "sum"),
346
+ inscrits=("inscrits", "sum"),
347
+ votants=("votants", "sum"),
348
+ blancs=("blancs", "sum"),
349
+ nuls=("nuls", "sum"),
350
+ )
351
+ )
352
+ base_inscrits = commune["inscrits"].replace(0, pd.NA)
353
+ commune["turnout_pct"] = commune["votants"] / base_inscrits
354
+ commune["blancs_pct"] = commune["blancs"] / base_inscrits
355
+ commune["nuls_pct"] = commune["nuls"] / base_inscrits
356
+ commune["nom_commune"] = commune["code_commune"].map(target_communes)
357
+ return commune
358
+
359
+
360
+ def build_elections_blocs(
361
+ elections_long_path: Path,
362
+ mapping_path: Path,
363
+ *,
364
+ national_results_path: Optional[Path] = None,
365
+ base_year: int = 2014,
366
+ target_communes_path: Path = DEFAULT_COMMUNES_PATH,
367
+ ) -> pd.DataFrame:
368
+ elections_long = load_elections_long(elections_long_path)
369
+ mapping = load_bloc_mapping(mapping_path)
370
+
371
+ elections_blocs = expand_voix_by_bloc(elections_long, mapping)
372
+
373
+ national_df = None
374
+ if national_results_path and national_results_path.exists():
375
+ if national_results_path.suffix == ".parquet":
376
+ national_df = pd.read_parquet(national_results_path)
377
+ else:
378
+ national_df = pd.read_csv(national_results_path, sep=";")
379
+ # Always attach national reference (computed from full data if no external source)
380
+ elections_blocs = attach_national_results(elections_blocs, national_df)
381
+ # Restreindre aux communes cibles via le fichier YAML
382
+ target_communes = load_target_communes(target_communes_path)
383
+ elections_blocs = filter_target_communes(elections_blocs, target_communes)
384
+
385
+ elections_blocs = compute_population_growth(elections_blocs, base_year=base_year)
386
+ elections_blocs = add_lag_features(elections_blocs)
387
+ return elections_blocs
388
+
389
+
390
+ def save_processed(df: pd.DataFrame, output_dir: Path) -> None:
391
+ output_dir.mkdir(parents=True, exist_ok=True)
392
+ parquet_path = output_dir / "elections_blocs.parquet"
393
+ csv_path = output_dir / "elections_blocs.csv"
394
+ df.to_parquet(parquet_path, index=False)
395
+ df.to_csv(csv_path, sep=";", index=False)
396
+
397
+
398
+ def save_commune_event_stats(df: pd.DataFrame, output_dir: Path) -> None:
399
+ output_dir.mkdir(parents=True, exist_ok=True)
400
+ parquet_path = output_dir / "commune_event_stats.parquet"
401
+ csv_path = output_dir / "commune_event_stats.csv"
402
+ df.to_parquet(parquet_path, index=False)
403
+ df.to_csv(csv_path, sep=";", index=False)
404
+
405
+
406
+ def run_full_pipeline(
407
+ elections_long_path: Path = Path("data/interim/elections_long.parquet"),
408
+ mapping_path: Path = Path("config/nuances.yaml"),
409
+ output_dir: Path = Path("data/processed"),
410
+ national_results_path: Optional[Path] = None,
411
+ target_communes_path: Path = DEFAULT_COMMUNES_PATH,
412
+ ) -> pd.DataFrame:
413
+ df = build_elections_blocs(
414
+ elections_long_path=elections_long_path,
415
+ mapping_path=mapping_path,
416
+ national_results_path=national_results_path,
417
+ target_communes_path=target_communes_path,
418
+ )
419
+ save_processed(df, output_dir)
420
+ elections_long = load_elections_long(elections_long_path)
421
+ target_communes = load_target_communes(target_communes_path)
422
+ commune_stats = compute_commune_event_stats(elections_long, target_communes)
423
+ save_commune_event_stats(commune_stats, output_dir)
424
+ return df
425
+
426
+
427
+ __all__ = [
428
+ "build_elections_blocs",
429
+ "run_full_pipeline",
430
+ "save_processed",
431
+ "normalize_bloc",
432
+ "load_target_communes",
433
+ "compute_commune_event_stats",
434
+ "save_commune_event_stats",
435
+ ]
src/prediction.py ADDED
@@ -0,0 +1,311 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Optional
5
+
6
+ import numpy as np
7
+ import pandas as pd
8
+ import warnings
9
+
10
+ from .constants import CANDIDATE_CATEGORIES
11
+ from .pipeline import normalize_bloc
12
+
13
+ try:
14
+ from numpy import RankWarning as NP_RANK_WARNING # type: ignore[attr-defined]
15
+ except Exception:
16
+ class NP_RANK_WARNING(UserWarning):
17
+ pass
18
+
19
+
20
+ @dataclass
21
+ class PredictionResult:
22
+ category: str
23
+ predicted_share: float
24
+ predicted_count: int
25
+
26
+
27
+ @dataclass
28
+ class PredictionSummary:
29
+ bloc_predictions: list[PredictionResult]
30
+ inscrits: Optional[int]
31
+ votants: Optional[int]
32
+ blancs: Optional[int]
33
+ nuls: Optional[int]
34
+ abstention: Optional[int]
35
+ exprimes: Optional[int]
36
+
37
+
38
+ DISPLAY_BLOC_ORDER = [
39
+ "extreme_gauche",
40
+ "gauche_dure",
41
+ "gauche_modere",
42
+ "centre",
43
+ "droite_modere",
44
+ "droite_dure",
45
+ "extreme_droite",
46
+ ]
47
+ EXTRA_CATEGORIES = ["blancs", "nuls", "abstention"]
48
+
49
+
50
+ def _clip01(value: float) -> float:
51
+ return float(min(1.0, max(0.0, value)))
52
+
53
+
54
+ def _last_share(df: pd.DataFrame, bloc: str, *, election: Optional[str] = None, year: Optional[int] = None) -> Optional[float]:
55
+ subset = df[df["bloc"] == bloc]
56
+ if election:
57
+ subset = subset[subset["type_scrutin"] == election]
58
+ if year is not None:
59
+ subset = subset[subset["annee"] == year]
60
+ if subset.empty:
61
+ return None
62
+ valid = subset.sort_values("date_scrutin")["part_bloc"].dropna()
63
+ if valid.empty:
64
+ return None
65
+ return valid.iloc[-1] # type: ignore[index]
66
+
67
+
68
+ def _last_value(series: pd.Series) -> Optional[float]:
69
+ series = pd.to_numeric(series, errors="coerce").dropna()
70
+ if series.empty:
71
+ return None
72
+ return float(series.iloc[-1])
73
+
74
+
75
+ def _project_share(series: pd.Series, years: pd.Series, target_year: int) -> Optional[float]:
76
+ df = pd.DataFrame({"value": pd.to_numeric(series, errors="coerce"), "year": pd.to_numeric(years, errors="coerce")})
77
+ df = df.dropna()
78
+ if df.empty:
79
+ return None
80
+ if len(df["year"].unique()) >= 2 and len(df) >= 2:
81
+ # Guard against poorly conditioned fits on tiny samples
82
+ with warnings.catch_warnings():
83
+ warnings.simplefilter("ignore", category=NP_RANK_WARNING)
84
+ try:
85
+ slope, intercept = np.polyfit(df["year"], df["value"], 1)
86
+ projected = slope * target_year + intercept
87
+ except Exception:
88
+ projected = df["value"].iloc[-1]
89
+ else:
90
+ projected = df["value"].iloc[-1]
91
+ return _clip01(float(projected))
92
+
93
+
94
+ def _project_rate(
95
+ series: pd.Series,
96
+ years: pd.Series,
97
+ target_year: int,
98
+ *,
99
+ min_points_trend: int = 3,
100
+ clamp_to_observed: bool = True,
101
+ ) -> Optional[float]:
102
+ df = pd.DataFrame(
103
+ {"value": pd.to_numeric(series, errors="coerce"), "year": pd.to_numeric(years, errors="coerce")}
104
+ ).dropna()
105
+ if df.empty:
106
+ return None
107
+ values = df["value"].to_numpy()
108
+ years_arr = df["year"].to_numpy()
109
+ if len(set(years_arr)) >= min_points_trend and len(df) >= min_points_trend:
110
+ with warnings.catch_warnings():
111
+ warnings.simplefilter("ignore", category=NP_RANK_WARNING)
112
+ try:
113
+ slope, intercept = np.polyfit(years_arr, values, 1)
114
+ projected = slope * target_year + intercept
115
+ except Exception:
116
+ projected = values[-1]
117
+ else:
118
+ projected = values[-1]
119
+ if clamp_to_observed and len(values):
120
+ projected = min(max(projected, float(np.nanmin(values))), float(np.nanmax(values)))
121
+ return _clip01(float(projected))
122
+
123
+
124
+ def _allocate_counts(shares: list[float], total: int) -> list[int]:
125
+ if total <= 0 or not shares:
126
+ return [0 for _ in shares]
127
+ arr = np.clip(np.asarray(shares, dtype=float), 0, None)
128
+ if arr.sum() == 0:
129
+ return [0 for _ in shares]
130
+ arr = arr / arr.sum()
131
+ raw = arr * total
132
+ floors = np.floor(raw)
133
+ remainder = int(total - floors.sum())
134
+ if remainder > 0:
135
+ order = np.argsort(-(raw - floors))
136
+ for idx in order[:remainder]:
137
+ floors[idx] += 1
138
+ return floors.astype(int).tolist()
139
+
140
+
141
+ def compute_predictions(
142
+ history: pd.DataFrame,
143
+ *,
144
+ target_election: str = "municipales",
145
+ target_year: int = 2026,
146
+ inscrits_override: Optional[float] = None,
147
+ ) -> PredictionSummary:
148
+ if history.empty:
149
+ return PredictionSummary([], None, None, None, None, None, None)
150
+
151
+ df = history.copy()
152
+ target_election = str(target_election).strip().lower()
153
+ df["bloc"] = df["bloc"].apply(normalize_bloc)
154
+ if "type_scrutin" in df.columns:
155
+ df["type_scrutin"] = df["type_scrutin"].astype(str).str.strip().str.lower()
156
+ # Coerce numeric and infer exprimes when missing from the sum of voix_bloc
157
+ for col in ["voix_bloc", "exprimes", "inscrits", "votants", "blancs", "nuls"]:
158
+ if col in df.columns:
159
+ df[col] = pd.to_numeric(df[col], errors="coerce")
160
+ for col in ["inscrits", "votants", "blancs", "nuls"]:
161
+ if col not in df.columns:
162
+ df[col] = np.nan
163
+ if "exprimes" in df.columns:
164
+ sum_voix = df.groupby(["code_bv", "date_scrutin"])["voix_bloc"].transform("sum")
165
+ df["exprimes"] = df["exprimes"].fillna(sum_voix)
166
+ df.loc[df["exprimes"] == 0, "exprimes"] = sum_voix
167
+ if "part_bloc" not in df.columns or df["part_bloc"].isna().all():
168
+ df["part_bloc"] = df["voix_bloc"] / df["exprimes"]
169
+ df["part_bloc"] = pd.to_numeric(df["part_bloc"], errors="coerce").clip(upper=1)
170
+ df = df.dropna(subset=["bloc"])
171
+
172
+ bloc_order = [b for b in DISPLAY_BLOC_ORDER if b in CANDIDATE_CATEGORIES]
173
+ raw_shares: dict[str, float] = {}
174
+ for bloc in bloc_order:
175
+ bloc_hist = df[df["bloc"] == bloc].sort_values("date_scrutin")
176
+ last_overall = _last_share(bloc_hist, bloc)
177
+ base_series = bloc_hist["part_bloc"]
178
+ base_years = bloc_hist["annee"]
179
+ if not bloc_hist.empty and target_election in bloc_hist["type_scrutin"].values:
180
+ base_series = bloc_hist[bloc_hist["type_scrutin"] == target_election]["part_bloc"]
181
+ base_years = bloc_hist[bloc_hist["type_scrutin"] == target_election]["annee"]
182
+
183
+ projected = _project_share(base_series, base_years, target_year)
184
+ if projected is None and last_overall is not None:
185
+ projected = last_overall
186
+ predicted = _clip01(projected or 0.0)
187
+ raw_shares[bloc] = predicted
188
+
189
+ share_values = np.array([raw_shares.get(b, 0.0) for b in bloc_order], dtype=float)
190
+ share_sum = share_values.sum()
191
+ if share_sum > 0:
192
+ share_values = share_values / share_sum
193
+ else:
194
+ share_values = np.zeros_like(share_values)
195
+
196
+ event_cols = [col for col in ["code_bv", "date_scrutin", "type_scrutin", "tour", "annee"] if col in df.columns]
197
+ event_df = df.groupby(event_cols, as_index=False).agg(
198
+ inscrits=("inscrits", "max"),
199
+ votants=("votants", "max"),
200
+ blancs=("blancs", "max"),
201
+ nuls=("nuls", "max"),
202
+ )
203
+ if "date_scrutin" in event_df.columns:
204
+ event_df = event_df.sort_values("date_scrutin")
205
+ if "type_scrutin" not in event_df.columns:
206
+ event_df["type_scrutin"] = ""
207
+ if "annee" not in event_df.columns:
208
+ if "date_scrutin" in event_df.columns:
209
+ event_df["annee"] = pd.to_datetime(event_df["date_scrutin"], errors="coerce").dt.year
210
+ else:
211
+ event_df["annee"] = np.nan
212
+ base_inscrits = event_df["inscrits"].replace(0, pd.NA)
213
+ event_df["taux_participation"] = event_df["votants"] / base_inscrits
214
+ event_df["taux_blancs"] = event_df["blancs"] / base_inscrits
215
+ event_df["taux_nuls"] = event_df["nuls"] / base_inscrits
216
+
217
+ def _select_series(col: str) -> tuple[pd.Series, pd.Series]:
218
+ scoped = event_df
219
+ if "tour" in event_df.columns:
220
+ round1 = event_df[event_df["tour"] == 1]
221
+ if not round1.empty:
222
+ scoped = round1
223
+ if not scoped.empty and target_election in scoped["type_scrutin"].values:
224
+ mask = scoped["type_scrutin"] == target_election
225
+ return scoped.loc[mask, col], scoped.loc[mask, "annee"]
226
+ return scoped[col], scoped["annee"]
227
+
228
+ turnout_series, turnout_years = _select_series("taux_participation")
229
+ blancs_series, blancs_years = _select_series("taux_blancs")
230
+ nuls_series, nuls_years = _select_series("taux_nuls")
231
+
232
+ taux_participation = _project_rate(turnout_series, turnout_years, target_year)
233
+ taux_blancs = _project_rate(blancs_series, blancs_years, target_year)
234
+ taux_nuls = _project_rate(nuls_series, nuls_years, target_year)
235
+
236
+ inscrits_used = None
237
+ if inscrits_override is not None:
238
+ try:
239
+ value = float(inscrits_override)
240
+ if value > 0:
241
+ inscrits_used = value
242
+ except (TypeError, ValueError):
243
+ inscrits_used = None
244
+ if inscrits_used is None:
245
+ inscrits_used = _last_value(event_df["inscrits"])
246
+ if inscrits_used is None:
247
+ return PredictionSummary([], None, None, None, None, None, None)
248
+
249
+ if taux_participation is None:
250
+ taux_participation = 0.0
251
+ if taux_blancs is None:
252
+ taux_blancs = 0.0
253
+ if taux_nuls is None:
254
+ taux_nuls = 0.0
255
+
256
+ if taux_blancs + taux_nuls > taux_participation and (taux_blancs + taux_nuls) > 0:
257
+ scale = taux_participation / (taux_blancs + taux_nuls)
258
+ taux_blancs *= scale
259
+ taux_nuls *= scale
260
+
261
+ inscrits_total = int(round(inscrits_used))
262
+ votants_total = int(round(inscrits_total * taux_participation))
263
+ blancs_total = int(round(inscrits_total * taux_blancs))
264
+ nuls_total = int(round(inscrits_total * taux_nuls))
265
+ if blancs_total + nuls_total > votants_total and (blancs_total + nuls_total) > 0:
266
+ scale = votants_total / (blancs_total + nuls_total)
267
+ blancs_total = int(round(blancs_total * scale))
268
+ nuls_total = int(round(nuls_total * scale))
269
+ exprimes_total = max(0, votants_total - blancs_total - nuls_total)
270
+ abstention_total = max(0, inscrits_total - votants_total)
271
+
272
+ bloc_counts = _allocate_counts(share_values.tolist(), exprimes_total)
273
+ bloc_predictions: list[PredictionResult] = []
274
+ for bloc, share, count in zip(bloc_order, share_values.tolist(), bloc_counts):
275
+ bloc_predictions.append(
276
+ PredictionResult(
277
+ category=bloc,
278
+ predicted_share=float(share),
279
+ predicted_count=int(count),
280
+ )
281
+ )
282
+
283
+ return PredictionSummary(
284
+ bloc_predictions=bloc_predictions,
285
+ inscrits=inscrits_total,
286
+ votants=votants_total,
287
+ blancs=blancs_total,
288
+ nuls=nuls_total,
289
+ abstention=abstention_total,
290
+ exprimes=exprimes_total,
291
+ )
292
+
293
+
294
+ def predictions_as_dataframe(summary: PredictionSummary) -> pd.DataFrame:
295
+ if summary is None or not summary.bloc_predictions:
296
+ return pd.DataFrame(columns=["categorie", "nombre"])
297
+ rows = []
298
+ pred_map = {item.category: item for item in summary.bloc_predictions}
299
+ for bloc in [b for b in DISPLAY_BLOC_ORDER if b in pred_map]:
300
+ item = pred_map[bloc]
301
+ rows.append({"categorie": bloc, "nombre": int(item.predicted_count)})
302
+ if summary.blancs is not None:
303
+ rows.append({"categorie": "blancs", "nombre": int(summary.blancs)})
304
+ if summary.nuls is not None:
305
+ rows.append({"categorie": "nuls", "nombre": int(summary.nuls)})
306
+ if summary.abstention is not None:
307
+ rows.append({"categorie": "abstention", "nombre": int(summary.abstention)})
308
+ return pd.DataFrame(rows)
309
+
310
+
311
+ __all__ = ["compute_predictions", "predictions_as_dataframe", "PredictionResult", "PredictionSummary"]