Sync from GitHub Actions
Browse files- .env.example +7 -0
- .gitignore +200 -0
- README.md +337 -1
- app.py +20 -5
- app/__init__.py +3 -0
- app/app.py +328 -0
- app/gradio_app.py +1645 -0
- config/communes.yaml +11 -0
- config/nuances.yaml +14 -0
- config/raw_sources.yaml +344 -0
- data/geo/bdv_s_te.geojson +0 -0
- data/geo/bdv_s_te.kml +1762 -0
- data/interim/elections_long.parquet +3 -0
- data/mapping_candidats_blocs.csv +61 -0
- data/mappings/category_mapping.csv +39 -0
- docker-compose.yml +38 -0
- harmoniser.md +19 -0
- main.py +117 -0
- mission.md +410 -0
- models/best_model.json +3 -0
- models/feature_columns.json +40 -0
- models/hist_gradient_boosting.joblib +3 -0
- models/model_card.md +8 -0
- requirements.txt +15 -0
- src/__init__.py +1 -0
- src/constants.py +35 -0
- src/data/__init__.py +3 -0
- src/data/preprocess.py +481 -0
- src/data_prep.py +418 -0
- src/database.py +153 -0
- src/db/__init__.py +3 -0
- src/db/ingest.py +241 -0
- src/db/schema.py +95 -0
- src/features/__init__.py +3 -0
- src/features/build_features.py +570 -0
- src/model/predict.py +201 -0
- src/model/train.py +666 -0
- src/pipeline.py +435 -0
- src/prediction.py +311 -0
.env.example
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
POSTGRES_USER=sete_admin
|
| 2 |
+
POSTGRES_PASSWORD=sete_password
|
| 3 |
+
POSTGRES_DB=elections
|
| 4 |
+
POSTGRES_PORT=5432
|
| 5 |
+
POSTGRES_HOST=localhost
|
| 6 |
+
# Option directe si vous préférez définir l'URL complète :
|
| 7 |
+
# DATABASE_URL=postgresql+psycopg2://sete_admin:sete_password@localhost:5432/elections
|
.gitignore
ADDED
|
@@ -0,0 +1,200 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Spécifique à ce projet
|
| 2 |
+
.DS_Store
|
| 3 |
+
*.code-workspace
|
| 4 |
+
*.pdf
|
| 5 |
+
/output/
|
| 6 |
+
questions.md
|
| 7 |
+
/reports/
|
| 8 |
+
/data/external/
|
| 9 |
+
/data/raw/
|
| 10 |
+
/datasets/
|
| 11 |
+
/data/processed/
|
| 12 |
+
/data/contours-france-entiere-latest-v2.geojson
|
| 13 |
+
data/interim/*
|
| 14 |
+
!data/interim/elections_long.parquet
|
| 15 |
+
runtime.txt
|
| 16 |
+
/logs/
|
| 17 |
+
.vscode
|
| 18 |
+
supports/
|
| 19 |
+
# Hugging Face
|
| 20 |
+
.hf/
|
| 21 |
+
.huggingface/
|
| 22 |
+
# vim
|
| 23 |
+
*.swp
|
| 24 |
+
*.swo
|
| 25 |
+
|
| 26 |
+
## https://github.com/github/gitignore/blob/e8554d85bf62e38d6db966a50d2064ac025fd82a/Python.gitignore
|
| 27 |
+
|
| 28 |
+
# Byte-compiled / optimized / DLL files
|
| 29 |
+
__pycache__/
|
| 30 |
+
*.py[cod]
|
| 31 |
+
*$py.class
|
| 32 |
+
|
| 33 |
+
# C extensions
|
| 34 |
+
*.so
|
| 35 |
+
|
| 36 |
+
# Distribution / packaging
|
| 37 |
+
.Python
|
| 38 |
+
build/
|
| 39 |
+
develop-eggs/
|
| 40 |
+
dist/
|
| 41 |
+
downloads/
|
| 42 |
+
eggs/
|
| 43 |
+
.eggs/
|
| 44 |
+
lib/
|
| 45 |
+
lib64/
|
| 46 |
+
parts/
|
| 47 |
+
sdist/
|
| 48 |
+
var/
|
| 49 |
+
wheels/
|
| 50 |
+
share/python-wheels/
|
| 51 |
+
*.egg-info/
|
| 52 |
+
.installed.cfg
|
| 53 |
+
*.egg
|
| 54 |
+
MANIFEST
|
| 55 |
+
|
| 56 |
+
# PyInstaller
|
| 57 |
+
# Usually these files are written by a python script from a template
|
| 58 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
| 59 |
+
*.manifest
|
| 60 |
+
*.spec
|
| 61 |
+
|
| 62 |
+
# Installer logs
|
| 63 |
+
pip-log.txt
|
| 64 |
+
pip-delete-this-directory.txt
|
| 65 |
+
|
| 66 |
+
# Unit test / coverage reports
|
| 67 |
+
htmlcov/
|
| 68 |
+
.tox/
|
| 69 |
+
.nox/
|
| 70 |
+
.coverage
|
| 71 |
+
.coverage.*
|
| 72 |
+
.cache
|
| 73 |
+
nosetests.xml
|
| 74 |
+
coverage.xml
|
| 75 |
+
*.cover
|
| 76 |
+
*.py,cover
|
| 77 |
+
.hypothesis/
|
| 78 |
+
.pytest_cache/
|
| 79 |
+
cover/
|
| 80 |
+
|
| 81 |
+
# Translations
|
| 82 |
+
*.mo
|
| 83 |
+
*.pot
|
| 84 |
+
|
| 85 |
+
# Django stuff:
|
| 86 |
+
*.log
|
| 87 |
+
local_settings.py
|
| 88 |
+
db.sqlite3
|
| 89 |
+
db.sqlite3-journal
|
| 90 |
+
|
| 91 |
+
# Flask stuff:
|
| 92 |
+
instance/
|
| 93 |
+
.webassets-cache
|
| 94 |
+
|
| 95 |
+
# Scrapy stuff:
|
| 96 |
+
.scrapy
|
| 97 |
+
|
| 98 |
+
# PyBuilder
|
| 99 |
+
.pybuilder/
|
| 100 |
+
target/
|
| 101 |
+
|
| 102 |
+
# Jupyter Notebook
|
| 103 |
+
.ipynb_checkpoints
|
| 104 |
+
|
| 105 |
+
# IPython
|
| 106 |
+
profile_default/
|
| 107 |
+
ipython_config.py
|
| 108 |
+
|
| 109 |
+
# pyenv
|
| 110 |
+
# For a library or package, you might want to ignore these files since the code is
|
| 111 |
+
# intended to run in multiple environments; otherwise, check them in:
|
| 112 |
+
# .python-version
|
| 113 |
+
|
| 114 |
+
# pipenv
|
| 115 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
| 116 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
| 117 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
| 118 |
+
# install all needed dependencies.
|
| 119 |
+
#Pipfile.lock
|
| 120 |
+
|
| 121 |
+
# UV
|
| 122 |
+
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
| 123 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
| 124 |
+
# commonly ignored for libraries.
|
| 125 |
+
#uv.lock
|
| 126 |
+
|
| 127 |
+
# poetry
|
| 128 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
| 129 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
| 130 |
+
# commonly ignored for libraries.
|
| 131 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
| 132 |
+
#poetry.lock
|
| 133 |
+
|
| 134 |
+
# pdm
|
| 135 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
| 136 |
+
#pdm.lock
|
| 137 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
| 138 |
+
# in version control.
|
| 139 |
+
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
|
| 140 |
+
.pdm.toml
|
| 141 |
+
.pdm-python
|
| 142 |
+
.pdm-build/
|
| 143 |
+
|
| 144 |
+
# pixi
|
| 145 |
+
# pixi.lock should be committed to version control for reproducibility
|
| 146 |
+
# .pixi/ contains the environments and should not be committed
|
| 147 |
+
.pixi/
|
| 148 |
+
|
| 149 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
| 150 |
+
__pypackages__/
|
| 151 |
+
|
| 152 |
+
# Celery stuff
|
| 153 |
+
celerybeat-schedule
|
| 154 |
+
celerybeat.pid
|
| 155 |
+
|
| 156 |
+
# SageMath parsed files
|
| 157 |
+
*.sage.py
|
| 158 |
+
|
| 159 |
+
# Environments
|
| 160 |
+
.env
|
| 161 |
+
.venv
|
| 162 |
+
env/
|
| 163 |
+
venv/
|
| 164 |
+
ENV/
|
| 165 |
+
env.bak/
|
| 166 |
+
venv.bak/
|
| 167 |
+
|
| 168 |
+
# Spyder project settings
|
| 169 |
+
.spyderproject
|
| 170 |
+
.spyproject
|
| 171 |
+
|
| 172 |
+
# Rope project settings
|
| 173 |
+
.ropeproject
|
| 174 |
+
|
| 175 |
+
# mypy
|
| 176 |
+
.mypy_cache/
|
| 177 |
+
.dmypy.json
|
| 178 |
+
dmypy.json
|
| 179 |
+
|
| 180 |
+
# Pyre type checker
|
| 181 |
+
.pyre/
|
| 182 |
+
|
| 183 |
+
# pytype static type analyzer
|
| 184 |
+
.pytype/
|
| 185 |
+
|
| 186 |
+
# Cython debug symbols
|
| 187 |
+
cython_debug/
|
| 188 |
+
|
| 189 |
+
# PyCharm
|
| 190 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
| 191 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
| 192 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
| 193 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
| 194 |
+
#.idea/
|
| 195 |
+
|
| 196 |
+
# Ruff stuff:
|
| 197 |
+
.ruff_cache/
|
| 198 |
+
|
| 199 |
+
# PyPI configuration file
|
| 200 |
+
.pypirc
|
README.md
CHANGED
|
@@ -9,4 +9,340 @@ app_file: app.py
|
|
| 9 |
pinned: false
|
| 10 |
---
|
| 11 |
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
pinned: false
|
| 10 |
---
|
| 11 |
|
| 12 |
+
# Elections Sète - Prévision municipales
|
| 13 |
+
|
| 14 |
+
Pipeline complet pour harmoniser les données électorales, construire un dataset panel sans fuite temporelle, entraîner des modèles multi-blocs, charger l'historique dans PostgreSQL et exposer des résultats via Gradio.
|
| 15 |
+
|
| 16 |
+
## Installation
|
| 17 |
+
- Python 3.10+ recommandé.
|
| 18 |
+
- `python3 -m venv .venv && source .venv/bin/activate`
|
| 19 |
+
- `pip install -r requirements.txt`
|
| 20 |
+
|
| 21 |
+
## Fichiers YAML (configuration)
|
| 22 |
+
### `config/communes.yaml`
|
| 23 |
+
Ce fichier définit **les communes à inclure** (codes INSEE). Il est consommé par le pipeline (`src.pipeline.run_full_pipeline`) pour filtrer les données au niveau commune.
|
| 24 |
+
|
| 25 |
+
Formats acceptés (les codes sont normalisés en 5 chiffres) :
|
| 26 |
+
```yaml
|
| 27 |
+
communes:
|
| 28 |
+
"34301": "Sète"
|
| 29 |
+
"34172": "Frontignan"
|
| 30 |
+
```
|
| 31 |
+
ou
|
| 32 |
+
```yaml
|
| 33 |
+
communes:
|
| 34 |
+
- code_insee: "34301"
|
| 35 |
+
nom: "Sète"
|
| 36 |
+
- "34172"
|
| 37 |
+
```
|
| 38 |
+
Si tu modifies ce fichier, il faut **relancer le pipeline** pour régénérer les données filtrées.
|
| 39 |
+
|
| 40 |
+
### `config/raw_sources.yaml`
|
| 41 |
+
Description des fichiers bruts et de leur structure (colonnes, séparateur, métadonnées).
|
| 42 |
+
C'est **le point d'entrée** pour ajouter un nouveau CSV au pipeline.
|
| 43 |
+
|
| 44 |
+
Exemple (copie d'une election precedente + ajustements) :
|
| 45 |
+
```yaml
|
| 46 |
+
24_L_T1.csv:
|
| 47 |
+
copy_from: 22_L_T1.csv
|
| 48 |
+
date_scrutin: "2024-06-30"
|
| 49 |
+
code_bv_cols: ["Code commune", "Code BV"]
|
| 50 |
+
rename_map:
|
| 51 |
+
Nuance Liste: code_candidature
|
| 52 |
+
Libellé Abrégé Liste: nom_candidature
|
| 53 |
+
```
|
| 54 |
+
|
| 55 |
+
### `config/nuances.yaml`
|
| 56 |
+
Mapping des nuances vers les blocs politiques (avec overrides).
|
| 57 |
+
Par défaut, le mapping CSV historique est réutilise et on peut **surcharger** ou **ajouter** des nuances :
|
| 58 |
+
```yaml
|
| 59 |
+
base_mapping: data/mapping_candidats_blocs.csv
|
| 60 |
+
overrides:
|
| 61 |
+
- code_candidature: "XYZ"
|
| 62 |
+
nom_candidature: "Exemple"
|
| 63 |
+
blocs: [gauche_modere, centre]
|
| 64 |
+
```
|
| 65 |
+
|
| 66 |
+
### `docker-compose.yml`
|
| 67 |
+
Fichier YAML pour démarrer PostgreSQL (et éventuellement pgAdmin). Utilisé par :
|
| 68 |
+
```bash
|
| 69 |
+
docker-compose up -d postgres
|
| 70 |
+
docker-compose --profile admin up
|
| 71 |
+
```
|
| 72 |
+
|
| 73 |
+
## 1. Prétraitement (harmonisation)
|
| 74 |
+
```bash
|
| 75 |
+
# Harmonisation des CSV bruts -> data/interim/elections_long.parquet
|
| 76 |
+
python -m src.data.preprocess --raw-dir data/raw --output-dir data/interim
|
| 77 |
+
```
|
| 78 |
+
Par défaut, le prétraitement lit `config/raw_sources.yaml`. Tu peux surcharger via `--meta-config`.
|
| 79 |
+
|
| 80 |
+
## 2. Pipeline communes + features (optionnel mais recommandé si tu filtres par communes)
|
| 81 |
+
Le pipeline applique le filtre `config/communes.yaml` et génère `data/processed/elections_blocs.*`.
|
| 82 |
+
À lancer depuis un notebook ou un petit script :
|
| 83 |
+
```bash
|
| 84 |
+
python3 - <<'PY'
|
| 85 |
+
from pathlib import Path
|
| 86 |
+
from src.pipeline import run_full_pipeline
|
| 87 |
+
|
| 88 |
+
run_full_pipeline(
|
| 89 |
+
elections_long_path=Path("data/interim/elections_long.parquet"),
|
| 90 |
+
mapping_path=Path("config/nuances.yaml"),
|
| 91 |
+
output_dir=Path("data/processed"),
|
| 92 |
+
target_communes_path=Path("config/communes.yaml"),
|
| 93 |
+
)
|
| 94 |
+
PY
|
| 95 |
+
```
|
| 96 |
+
|
| 97 |
+
## 3. Construction du panel (features + cibles)
|
| 98 |
+
```bash
|
| 99 |
+
python -m src.features.build_features \
|
| 100 |
+
--elections-long data/interim/elections_long.parquet \
|
| 101 |
+
--mapping config/nuances.yaml \
|
| 102 |
+
--output data/processed/panel.parquet
|
| 103 |
+
```
|
| 104 |
+
Le dictionnaire de données est généré dans `data/processed/data_dictionary.md`.
|
| 105 |
+
|
| 106 |
+
Note : `src.features.build_features` **ne filtre pas** via `config/communes.yaml`. Si tu veux limiter l'entraînement à certaines communes, filtre `elections_long` en amont ou adapte le pipeline.
|
| 107 |
+
|
| 108 |
+
## 4. Base PostgreSQL
|
| 109 |
+
```bash
|
| 110 |
+
cp .env.example .env
|
| 111 |
+
docker-compose up -d postgres # pgAdmin en option: `docker-compose --profile admin up`
|
| 112 |
+
|
| 113 |
+
# Ingestion du panel dans le schéma normalisé
|
| 114 |
+
python -m src.db.ingest --input data/processed/panel.parquet
|
| 115 |
+
```
|
| 116 |
+
Le schéma est défini dans `src/db/schema.py`.
|
| 117 |
+
|
| 118 |
+
## 5. Entraînement & évaluation
|
| 119 |
+
Commande demandée (CV stricte par scrutin) :
|
| 120 |
+
```bash
|
| 121 |
+
python3 -m src.model.train --cv-splits 4 --models hist_gradient_boosting
|
| 122 |
+
```
|
| 123 |
+
|
| 124 |
+
Options principales :
|
| 125 |
+
- `--panel` : chemin du panel (`data/processed/panel.parquet` par défaut).
|
| 126 |
+
- `--models-dir` / `--reports-dir` : sorties modèles et rapports.
|
| 127 |
+
- `--train-end-year`, `--valid-end-year`, `--test-start-year` : split temporel.
|
| 128 |
+
- `--cv-splits` : nb de folds temporels (par scrutin).
|
| 129 |
+
- `--no-tune` : désactive la grille d'hyperparamètres.
|
| 130 |
+
- `--max-trials` : limite le nombre d'essais par modèle.
|
| 131 |
+
- `--models` : liste de modèles à tester (ex: `ridge`, `hist_gradient_boosting`, `lightgbm`, `xgboost`, `two_stage_hgb`, `catboost`).
|
| 132 |
+
|
| 133 |
+
Sorties :
|
| 134 |
+
- Modèle + preprocessor : `models/<nom>.joblib` et `models/feature_columns.json`
|
| 135 |
+
- Modèle sélectionné : `models/best_model.json`
|
| 136 |
+
- Rapport métriques : `reports/metrics.json` et `reports/metrics.md`
|
| 137 |
+
- CV détaillée : `reports/cv_summary.csv`
|
| 138 |
+
- Figure : `reports/figures/mae_per_category.png`
|
| 139 |
+
- Model card : `models/model_card.md`
|
| 140 |
+
|
| 141 |
+
## 6. Génération de prédictions hors ligne
|
| 142 |
+
```bash
|
| 143 |
+
python -m src.model.predict \
|
| 144 |
+
--model-path models/hist_gradient_boosting.joblib \
|
| 145 |
+
--target-election-type municipales \
|
| 146 |
+
--target-year 2026 \
|
| 147 |
+
--commune-code 34301
|
| 148 |
+
# -> predictions/pred_municipales_2026_sete.csv
|
| 149 |
+
```
|
| 150 |
+
Cette commande produit des **parts (%)** et des deltas vs législatives et municipales 2020.
|
| 151 |
+
|
| 152 |
+
## 7. Application Gradio
|
| 153 |
+
```bash
|
| 154 |
+
python -m app.gradio_app
|
| 155 |
+
```
|
| 156 |
+
Comportement :
|
| 157 |
+
- Backend PostgreSQL si disponible, sinon fallback fichiers locaux.
|
| 158 |
+
- **Historique** : consultation bureau par bureau (pas de ML).
|
| 159 |
+
- **Prédiction** : parts par bloc converties en **comptes** (personnes) + `blancs`, `nuls`, `abstentions`.
|
| 160 |
+
- `inscrits` peut être fourni par l'utilisateur (sinon valeur historique la plus récente du bureau).
|
| 161 |
+
- Cibles proposées : municipales 2026 (tour 1), legislatives 2027 (tour 1), presidentielles 2027 (tour 1).
|
| 162 |
+
|
| 163 |
+
## Structure des données
|
| 164 |
+
- Configurations : `config/`
|
| 165 |
+
- Bruts : `data/raw/`
|
| 166 |
+
- Long harmonisé : `data/interim/elections_long.parquet`
|
| 167 |
+
- Élections blocs (filtrées) : `data/processed/elections_blocs.parquet`
|
| 168 |
+
- Stats communales par scrutin : `data/processed/commune_event_stats.parquet`
|
| 169 |
+
- Panel features+cibles : `data/processed/panel.parquet`
|
| 170 |
+
- Mapping nuances -> catégories : `config/nuances.yaml` (base: `data/mapping_candidats_blocs.csv`)
|
| 171 |
+
|
| 172 |
+
## Notes
|
| 173 |
+
- Aucune fuite temporelle : les features sont calculées uniquement sur des scrutins strictement antérieurs à la cible.
|
| 174 |
+
- Les parts sont clipées à [0, 1] puis renormalisées.
|
| 175 |
+
- Les blancs/nuls dépendent des colonnes disponibles dans l'historique ; si une source ne les fournit pas, ils seront à 0.
|
| 176 |
+
|
| 177 |
+
## Inventaire des fichiers (snapshot)
|
| 178 |
+
Statuts :
|
| 179 |
+
- `actif` : utilisé par le pipeline actuel.
|
| 180 |
+
- `généré` : produit par le pipeline/entraînement (recréable).
|
| 181 |
+
- `hérité (début projet)` : ancien fichier ou prototype.
|
| 182 |
+
- `optionnel` : utile mais non requis au runtime.
|
| 183 |
+
- `système (inutile)` : métadonnées OS.
|
| 184 |
+
|
| 185 |
+
| Fichier | Fonction | Statut |
|
| 186 |
+
|---|---|---|
|
| 187 |
+
| `.DS_Store` | Métadonnées macOS | système (inutile) |
|
| 188 |
+
| `.env.example` | Template des variables d'environnement (DB) | actif |
|
| 189 |
+
| `.gitignore` | Règles gitignore | actif |
|
| 190 |
+
| `Elections_Sete.code-workspace` | Config VSCode (workspace) | optionnel |
|
| 191 |
+
| `README.md` | Documentation projet | actif |
|
| 192 |
+
| `app/__init__.py` | Package app (init) | actif |
|
| 193 |
+
| `app/app.py` | Ancienne app Gradio (bv_features.parquet) | hérité (début projet) |
|
| 194 |
+
| `app/gradio_app.py` | Application Gradio principale | actif |
|
| 195 |
+
| `app.py` | Ancienne interface Gradio (compute_predictions) | hérité (début projet) |
|
| 196 |
+
| `catboost_info/catboost_training.json` | Artefacts CatBoost (logs/metrics) | généré |
|
| 197 |
+
| `catboost_info/learn/events.out.tfevents` | Artefacts CatBoost (logs/metrics) | généré |
|
| 198 |
+
| `catboost_info/learn_error.tsv` | Artefacts CatBoost (logs/metrics) | généré |
|
| 199 |
+
| `catboost_info/time_left.tsv` | Artefacts CatBoost (logs/metrics) | généré |
|
| 200 |
+
| `config/communes.yaml` | Liste des communes cibles (codes INSEE) | actif |
|
| 201 |
+
| `config/nuances.yaml` | Overrides mapping nuances -> blocs | actif |
|
| 202 |
+
| `config/raw_sources.yaml` | Schéma des CSV bruts (meta-config) | actif |
|
| 203 |
+
| `data/.DS_Store` | Métadonnées macOS | système (inutile) |
|
| 204 |
+
| `data/contours-france-entiere-latest-v2.geojson` | Fond cartographique (geojson) | optionnel |
|
| 205 |
+
| `data/interim/.DS_Store` | Métadonnées macOS | système (inutile) |
|
| 206 |
+
| `data/interim/candidates_long.parquet` | Données intermédiaires long format | généré |
|
| 207 |
+
| `data/interim/elections_long.csv` | Données intermédiaires long format | généré |
|
| 208 |
+
| `data/interim/elections_long.parquet` | Données intermédiaires long format | généré |
|
| 209 |
+
| `data/interim/frames_std/14_EU.parquet` | Intermédiaire standardisé par scrutin | généré |
|
| 210 |
+
| `data/interim/frames_std/14_MN14_T1T2.parquet` | Intermédiaire standardisé par scrutin | généré |
|
| 211 |
+
| `data/interim/frames_std/17_L_T1.parquet` | Intermédiaire standardisé par scrutin | généré |
|
| 212 |
+
| `data/interim/frames_std/17_L_T2.parquet` | Intermédiaire standardisé par scrutin | généré |
|
| 213 |
+
| `data/interim/frames_std/17_PR_T1.parquet` | Intermédiaire standardisé par scrutin | généré |
|
| 214 |
+
| `data/interim/frames_std/17_PR_T2.parquet` | Intermédiaire standardisé par scrutin | généré |
|
| 215 |
+
| `data/interim/frames_std/19_EU.parquet` | Intermédiaire standardisé par scrutin | généré |
|
| 216 |
+
| `data/interim/frames_std/20_MN_T1.parquet` | Intermédiaire standardisé par scrutin | généré |
|
| 217 |
+
| `data/interim/frames_std/20_MN_T2.parquet` | Intermédiaire standardisé par scrutin | généré |
|
| 218 |
+
| `data/interim/frames_std/21_DEP_T1.parquet` | Intermédiaire standardisé par scrutin | généré |
|
| 219 |
+
| `data/interim/frames_std/21_DEP_T2.parquet` | Intermédiaire standardisé par scrutin | généré |
|
| 220 |
+
| `data/interim/frames_std/21_REG_T1.parquet` | Intermédiaire standardisé par scrutin | généré |
|
| 221 |
+
| `data/interim/frames_std/21_REG_T2.parquet` | Intermédiaire standardisé par scrutin | généré |
|
| 222 |
+
| `data/interim/frames_std/22_L_T1.parquet` | Intermédiaire standardisé par scrutin | généré |
|
| 223 |
+
| `data/interim/frames_std/22_L_T2.parquet` | Intermédiaire standardisé par scrutin | généré |
|
| 224 |
+
| `data/interim/frames_std/22_PR_T1.parquet` | Intermédiaire standardisé par scrutin | généré |
|
| 225 |
+
| `data/interim/frames_std/22_PR_T2.parquet` | Intermédiaire standardisé par scrutin | généré |
|
| 226 |
+
| `data/interim/frames_std/24_EU.parquet` | Intermédiaire standardisé par scrutin | généré |
|
| 227 |
+
| `data/interim/harmonized/14_EU_harmonized.csv` | CSV harmonisé par scrutin | généré |
|
| 228 |
+
| `data/interim/harmonized/14_MN14_T1T2_harmonized.csv` | CSV harmonisé par scrutin | généré |
|
| 229 |
+
| `data/interim/harmonized/17_L_T1_harmonized.csv` | CSV harmonisé par scrutin | généré |
|
| 230 |
+
| `data/interim/harmonized/17_L_T2_harmonized.csv` | CSV harmonisé par scrutin | généré |
|
| 231 |
+
| `data/interim/harmonized/17_PR_T1_harmonized.csv` | CSV harmonisé par scrutin | généré |
|
| 232 |
+
| `data/interim/harmonized/17_PR_T2_harmonized.csv` | CSV harmonisé par scrutin | généré |
|
| 233 |
+
| `data/interim/harmonized/19_EU_harmonized.csv` | CSV harmonisé par scrutin | généré |
|
| 234 |
+
| `data/interim/harmonized/20_MN_T1_harmonized.csv` | CSV harmonisé par scrutin | généré |
|
| 235 |
+
| `data/interim/harmonized/20_MN_T2_harmonized.csv` | CSV harmonisé par scrutin | généré |
|
| 236 |
+
| `data/interim/harmonized/21_DEP_T1_harmonized.csv` | CSV harmonisé par scrutin | généré |
|
| 237 |
+
| `data/interim/harmonized/21_DEP_T2_harmonized.csv` | CSV harmonisé par scrutin | généré |
|
| 238 |
+
| `data/interim/harmonized/21_REG_T1_harmonized.csv` | CSV harmonisé par scrutin | généré |
|
| 239 |
+
| `data/interim/harmonized/21_REG_T2_harmonized.csv` | CSV harmonisé par scrutin | généré |
|
| 240 |
+
| `data/interim/harmonized/22_L_T1_harmonized.csv` | CSV harmonisé par scrutin | généré |
|
| 241 |
+
| `data/interim/harmonized/22_L_T2_harmonized.csv` | CSV harmonisé par scrutin | généré |
|
| 242 |
+
| `data/interim/harmonized/22_PR_T1_harmonized.csv` | CSV harmonisé par scrutin | généré |
|
| 243 |
+
| `data/interim/harmonized/22_PR_T2_harmonized.csv` | CSV harmonisé par scrutin | généré |
|
| 244 |
+
| `data/interim/harmonized/24_EU_harmonized.csv` | CSV harmonisé par scrutin | généré |
|
| 245 |
+
| `data/interim/unmapped_nuances.csv` | Données intermédiaires long format | généré |
|
| 246 |
+
| `data/mapping_candidats_blocs.csv` | Mapping nuances -> blocs (base) | actif |
|
| 247 |
+
| `data/mappings/category_mapping.csv` | Copie/variante de mapping | hérité (début projet) |
|
| 248 |
+
| `data/processed/bv_features.parquet` | Features legacy (utilisées par app/app.py) | hérité (début projet) |
|
| 249 |
+
| `data/processed/data_dictionary.md` | Dictionnaire de données généré | généré (doc) |
|
| 250 |
+
| `data/processed/elections_blocs.csv` | Dataset blocs (filtré communes) | généré (utilisé) |
|
| 251 |
+
| `data/processed/elections_blocs.parquet` | Dataset blocs (filtré communes) | généré (utilisé) |
|
| 252 |
+
| `data/processed/history_cache.parquet` | Cache local (historique/prédictions) | généré (cache) |
|
| 253 |
+
| `data/processed/panel.csv` | Panel features+cibles | généré (utilisé) |
|
| 254 |
+
| `data/processed/panel.parquet` | Panel features+cibles | généré (utilisé) |
|
| 255 |
+
| `data/processed/predictions_cache.parquet` | Cache local (historique/prédictions) | généré (cache) |
|
| 256 |
+
| `data/processed/predictions_municipales_2026.csv` | Exports de prédictions | généré (résultats) |
|
| 257 |
+
| `data/processed/predictions_municipales_2026_blocs.csv` | Exports de prédictions | généré (résultats) |
|
| 258 |
+
| `data/processed/predictions_municipales_sete_2026.csv` | Exports de prédictions | généré (résultats) |
|
| 259 |
+
| `data/raw/14_EU.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
|
| 260 |
+
| `data/raw/14_MN14_T1T2.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
|
| 261 |
+
| `data/raw/17_L_T1.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
|
| 262 |
+
| `data/raw/17_L_T2.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
|
| 263 |
+
| `data/raw/17_PR_T1.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
|
| 264 |
+
| `data/raw/17_PR_T2.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
|
| 265 |
+
| `data/raw/19_EU.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
|
| 266 |
+
| `data/raw/20_MN_T1.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
|
| 267 |
+
| `data/raw/20_MN_T2.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
|
| 268 |
+
| `data/raw/21_DEP_T1.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
|
| 269 |
+
| `data/raw/21_DEP_T2.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
|
| 270 |
+
| `data/raw/21_REG_T1.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
|
| 271 |
+
| `data/raw/21_REG_T2.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
|
| 272 |
+
| `data/raw/22_L_T1.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
|
| 273 |
+
| `data/raw/22_L_T2.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
|
| 274 |
+
| `data/raw/22_PR_T1.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
|
| 275 |
+
| `data/raw/22_PR_T2.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
|
| 276 |
+
| `data/raw/24_EU.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
|
| 277 |
+
| `data/raw/24_L_T1.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
|
| 278 |
+
| `data/raw/24_L_T2.csv` | Données brutes (entrée prétraitement) | actif (entrée pipeline) |
|
| 279 |
+
| `datasets/.DS_Store` | Métadonnées macOS | système (inutile) |
|
| 280 |
+
| `datasets/14_EU.csv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
|
| 281 |
+
| `datasets/14_MN14_T1T2.csv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
|
| 282 |
+
| `datasets/17_L_T1.csv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
|
| 283 |
+
| `datasets/17_L_T2.csv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
|
| 284 |
+
| `datasets/17_PR_T1.csv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
|
| 285 |
+
| `datasets/17_PR_T2.csv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
|
| 286 |
+
| `datasets/19_EU.csv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
|
| 287 |
+
| `datasets/20_MN_T1.tsv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
|
| 288 |
+
| `datasets/20_MN_T2.csv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
|
| 289 |
+
| `datasets/21_DEP_T1.csv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
|
| 290 |
+
| `datasets/21_DEP_T2.csv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
|
| 291 |
+
| `datasets/21_REG_T1.csv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
|
| 292 |
+
| `datasets/21_REG_T2.csv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
|
| 293 |
+
| `datasets/22_L_T1.csv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
|
| 294 |
+
| `datasets/22_L_T2.csv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
|
| 295 |
+
| `datasets/22_PR_T1.csv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
|
| 296 |
+
| `datasets/22_PR_T2.csv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
|
| 297 |
+
| `datasets/24_EU.csv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
|
| 298 |
+
| `datasets/24_L_T1T2.csv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
|
| 299 |
+
| `datasets/24_L_T2.csv` | Copie brute des datasets (ancienne structure) | hérité (début projet) |
|
| 300 |
+
| `docker-compose.yml` | Services Docker (PostgreSQL/pgAdmin) | actif |
|
| 301 |
+
| `harmoniser.md` | Notes d'harmonisation | optionnel |
|
| 302 |
+
| `main.py` | Orchestrateur pipeline (CLI utilitaire) | optionnel |
|
| 303 |
+
| `mission.md` | Backlog / notes projet | optionnel |
|
| 304 |
+
| `models/best_model.json` | Nom du meilleur modèle | généré (utilisé) |
|
| 305 |
+
| `models/feature_columns.json` | Liste des features du modèle | généré (utilisé) |
|
| 306 |
+
| `models/hist_gradient_boosting.joblib` | Modèle entraîné | généré (utilisé) |
|
| 307 |
+
| `models/model_card.md` | Model card (synthèse) | généré (doc) |
|
| 308 |
+
| `notebooks/01_pretraitement.ipynb` | Notebook d'analyse / exploration | optionnel (exploration) |
|
| 309 |
+
| `notebooks/02_feature_engineering.ipynb` | Notebook d'analyse / exploration | optionnel (exploration) |
|
| 310 |
+
| `notebooks/03_modelisation_prediction.ipynb` | Notebook d'analyse / exploration | optionnel (exploration) |
|
| 311 |
+
| `notebooks/aed.ipynb` | Notebook d'analyse / exploration | optionnel (exploration) |
|
| 312 |
+
| `notebooks/catboost_info/catboost_training.json` | Artefacts CatBoost (notebook) | généré |
|
| 313 |
+
| `notebooks/catboost_info/learn/events.out.tfevents` | Artefacts CatBoost (notebook) | généré |
|
| 314 |
+
| `notebooks/catboost_info/learn_error.tsv` | Artefacts CatBoost (notebook) | généré |
|
| 315 |
+
| `notebooks/catboost_info/time_left.tsv` | Artefacts CatBoost (notebook) | généré |
|
| 316 |
+
| `output/.DS_Store` | Métadonnées macOS | système (inutile) |
|
| 317 |
+
| `output/Sans titre 2.png` | Exports graphiques | hérité (début projet) |
|
| 318 |
+
| `output/Sans titre 3.png` | Exports graphiques | hérité (début projet) |
|
| 319 |
+
| `output/Sans titre 4.png` | Exports graphiques | hérité (début projet) |
|
| 320 |
+
| `output/Sans titre 5.png` | Exports graphiques | hérité (début projet) |
|
| 321 |
+
| `output/Sans titre 6.png` | Exports graphiques | hérité (début projet) |
|
| 322 |
+
| `output/Sans titre.png` | Exports graphiques | hérité (début projet) |
|
| 323 |
+
| `output/output.png` | Exports graphiques | hérité (début projet) |
|
| 324 |
+
| `predictions/pred_municipales_2026_sete.csv` | Exports de prédictions | généré (résultats) |
|
| 325 |
+
| `reports/colonnes_comparatif.csv` | Rapport / métriques | généré |
|
| 326 |
+
| `reports/cv_summary.csv` | Rapport / métriques | généré |
|
| 327 |
+
| `reports/figures/mae_per_category.png` | Figures de rapports | généré |
|
| 328 |
+
| `reports/metrics.json` | Rapport / métriques | généré |
|
| 329 |
+
| `reports/metrics.md` | Rapport / note analytique | généré (doc) |
|
| 330 |
+
| `reports/notebook_audit.md` | Rapport / note analytique | généré (doc) |
|
| 331 |
+
| `requirements.txt` | Dépendances Python | actif |
|
| 332 |
+
| `src/__init__.py` | Package src (init) | actif |
|
| 333 |
+
| `src/constants.py` | Constantes projet | actif |
|
| 334 |
+
| `src/data/__init__.py` | Module data | actif |
|
| 335 |
+
| `src/data/preprocess.py` | Prétraitement/harmonisation | actif |
|
| 336 |
+
| `src/data_prep.py` | Librairie d'harmonisation des données | actif |
|
| 337 |
+
| `src/database.py` | Accès base SQL (fallback/app) | actif |
|
| 338 |
+
| `src/db/__init__.py` | Module DB | actif |
|
| 339 |
+
| `src/db/ingest.py` | Ingestion PostgreSQL | actif |
|
| 340 |
+
| `src/db/schema.py` | Schéma PostgreSQL | actif |
|
| 341 |
+
| `src/features/__init__.py` | Module features | actif |
|
| 342 |
+
| `src/features/build_features.py` | Construction du panel features+cibles | actif |
|
| 343 |
+
| `src/model/predict.py` | Prédiction hors ligne | actif |
|
| 344 |
+
| `src/model/train.py` | Entraînement + CV | actif |
|
| 345 |
+
| `src/pipeline.py` | Pipeline de construction (blocs + stats) | actif |
|
| 346 |
+
| `src/prediction.py` | Prédiction legacy (app.py) | hérité (début projet) |
|
| 347 |
+
| `supports/Plan-2024_Bureaux-de-vote.pdf` | Documents de référence | optionnel |
|
| 348 |
+
| `supports/zonages_admin_canton.pdf` | Documents de référence | optionnel |
|
app.py
CHANGED
|
@@ -1,7 +1,22 @@
|
|
| 1 |
-
|
| 2 |
|
| 3 |
-
|
| 4 |
-
|
| 5 |
|
| 6 |
-
|
| 7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
|
| 3 |
+
import importlib.util
|
| 4 |
+
from pathlib import Path
|
| 5 |
|
| 6 |
+
|
| 7 |
+
def _load_gradio_module():
|
| 8 |
+
module_path = Path(__file__).resolve().parent / "app" / "gradio_app.py"
|
| 9 |
+
spec = importlib.util.spec_from_file_location("gradio_app_module", module_path)
|
| 10 |
+
if spec is None or spec.loader is None:
|
| 11 |
+
raise RuntimeError(f"Impossible de charger {module_path}")
|
| 12 |
+
module = importlib.util.module_from_spec(spec)
|
| 13 |
+
spec.loader.exec_module(module)
|
| 14 |
+
return module
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
_gradio = _load_gradio_module()
|
| 18 |
+
demo = _gradio.create_interface()
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
if __name__ == "__main__":
|
| 22 |
+
demo.launch(server_name="0.0.0.0", server_port=7860)
|
app/__init__.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Gradio application package.
|
| 3 |
+
"""
|
app/app.py
ADDED
|
@@ -0,0 +1,328 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import numpy as np
|
| 4 |
+
import matplotlib.pyplot as plt
|
| 5 |
+
|
| 6 |
+
# =========================
|
| 7 |
+
# Chargement des données
|
| 8 |
+
# =========================
|
| 9 |
+
|
| 10 |
+
DATA_PATH = "data/processed/bv_features.parquet"
|
| 11 |
+
|
| 12 |
+
df = pd.read_parquet(DATA_PATH)
|
| 13 |
+
df["date_scrutin"] = pd.to_datetime(df.get("date_scrutin"), errors="coerce") # type: ignore
|
| 14 |
+
df["tour"] = pd.to_numeric(df.get("tour"), errors="coerce").astype("Int64") # type: ignore
|
| 15 |
+
|
| 16 |
+
# -------------------------
|
| 17 |
+
# Filtrage Sète uniquement
|
| 18 |
+
# -------------------------
|
| 19 |
+
# Hypothèse : code_commune INSEE
|
| 20 |
+
SETE_CODE_INSEE = "34301"
|
| 21 |
+
|
| 22 |
+
def resolve_code_commune(df_in: pd.DataFrame) -> tuple[pd.DataFrame, str | None]:
|
| 23 |
+
df_out = df_in.copy()
|
| 24 |
+
if "code_commune" in df_out.columns:
|
| 25 |
+
df_out["code_commune"] = df_out["code_commune"].astype("string")
|
| 26 |
+
return df_out, None
|
| 27 |
+
if "Code de la commune" in df_out.columns:
|
| 28 |
+
df_out = df_out.rename(columns={"Code de la commune": "code_commune"})
|
| 29 |
+
df_out["code_commune"] = df_out["code_commune"].astype("string")
|
| 30 |
+
return df_out, None
|
| 31 |
+
if "code_bv" in df_out.columns:
|
| 32 |
+
df_out["code_commune"] = df_out["code_bv"].astype(str).str.slice(0, 5)
|
| 33 |
+
df_out["code_commune"] = df_out["code_commune"].astype("string")
|
| 34 |
+
valid = df_out["code_commune"].str.len() == 5
|
| 35 |
+
if not valid.any():
|
| 36 |
+
return df_out, "Impossible de dériver code_commune depuis code_bv (format inattendu)."
|
| 37 |
+
return df_out, None
|
| 38 |
+
df_out["code_commune"] = pd.NA
|
| 39 |
+
return df_out, "Aucune colonne commune disponible (code_commune/Code de la commune/code_bv)."
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
df, commune_warning = resolve_code_commune(df)
|
| 43 |
+
df["code_commune"] = (
|
| 44 |
+
df["code_commune"]
|
| 45 |
+
.astype(str)
|
| 46 |
+
.str.replace(".0", "", regex=False)
|
| 47 |
+
.str.replace(r"\D", "", regex=True)
|
| 48 |
+
.str.zfill(5)
|
| 49 |
+
.astype("string")
|
| 50 |
+
)
|
| 51 |
+
df_sete = df[df["code_commune"] == SETE_CODE_INSEE].copy()
|
| 52 |
+
df_sete["tour"] = pd.to_numeric(df_sete["tour"], errors="coerce").astype("Int64")
|
| 53 |
+
|
| 54 |
+
# Colonnes blocs
|
| 55 |
+
BASE_BLOCS = [
|
| 56 |
+
"droite_modere",
|
| 57 |
+
"gauche_modere",
|
| 58 |
+
"gauche_dure",
|
| 59 |
+
"droite_dure",
|
| 60 |
+
"centre",
|
| 61 |
+
"extreme_gauche",
|
| 62 |
+
"extreme_droite",
|
| 63 |
+
"autre",
|
| 64 |
+
]
|
| 65 |
+
BLOC_LABELS = [b for b in BASE_BLOCS if f"part_bloc_{b}" in df_sete.columns]
|
| 66 |
+
BLOC_COLS = [f"part_bloc_{b}" for b in BLOC_LABELS]
|
| 67 |
+
|
| 68 |
+
# =========================
|
| 69 |
+
# Fonctions métier
|
| 70 |
+
# =========================
|
| 71 |
+
|
| 72 |
+
def compute_national_reference(df_all, type_scrutin, tour):
|
| 73 |
+
"""
|
| 74 |
+
Calcule les parts nationales par bloc pour un scrutin et un tour donnés.
|
| 75 |
+
"""
|
| 76 |
+
if not BLOC_COLS:
|
| 77 |
+
return {}
|
| 78 |
+
df_nat = df_all[
|
| 79 |
+
(df_all["type_scrutin"] == type_scrutin)
|
| 80 |
+
& (df_all["tour"] == tour)
|
| 81 |
+
]
|
| 82 |
+
|
| 83 |
+
# pondération par exprimés
|
| 84 |
+
weights = df_nat["exprimes"].replace(0, np.nan)
|
| 85 |
+
|
| 86 |
+
national = {}
|
| 87 |
+
for col in BLOC_COLS:
|
| 88 |
+
national[col] = np.nansum(df_nat[col] * weights) / np.nansum(weights)
|
| 89 |
+
|
| 90 |
+
return national
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
def table_sete(type_scrutin, tour):
|
| 94 |
+
if not BLOC_COLS:
|
| 95 |
+
return pd.DataFrame({"info": ["Colonnes part_bloc_* absentes."]})
|
| 96 |
+
tour_val = pd.to_numeric(tour, errors="coerce")
|
| 97 |
+
if pd.isna(tour_val):
|
| 98 |
+
return pd.DataFrame({"info": ["Tour invalide."]})
|
| 99 |
+
# données locales
|
| 100 |
+
local = df_sete[
|
| 101 |
+
(df_sete["type_scrutin"] == type_scrutin)
|
| 102 |
+
& (df_sete["tour"] == int(tour_val))
|
| 103 |
+
].copy()
|
| 104 |
+
|
| 105 |
+
if local.empty:
|
| 106 |
+
return pd.DataFrame({"info": ["Aucune donnée disponible"]})
|
| 107 |
+
|
| 108 |
+
# référence nationale
|
| 109 |
+
nat = compute_national_reference(df, type_scrutin, tour)
|
| 110 |
+
|
| 111 |
+
# construction tableau affiché
|
| 112 |
+
rows = []
|
| 113 |
+
|
| 114 |
+
for _, row in local.iterrows():
|
| 115 |
+
r = {
|
| 116 |
+
"code_bv": row["code_bv"],
|
| 117 |
+
"nom_bv": row.get("nom_bv", ""),
|
| 118 |
+
}
|
| 119 |
+
|
| 120 |
+
for col in BLOC_COLS:
|
| 121 |
+
part = row[col]
|
| 122 |
+
ecart = part - nat.get(col, 0)
|
| 123 |
+
|
| 124 |
+
r[col.replace("part_bloc_", "")] = round(part * 100, 2)
|
| 125 |
+
r[col.replace("part_bloc_", "") + "_ecart_nat"] = round(ecart * 100, 2)
|
| 126 |
+
|
| 127 |
+
rows.append(r)
|
| 128 |
+
|
| 129 |
+
result = pd.DataFrame(rows)
|
| 130 |
+
|
| 131 |
+
# tri par écart extrême droite (exemple)
|
| 132 |
+
if "extreme_droite_ecart_nat" in result.columns:
|
| 133 |
+
result = result.sort_values(
|
| 134 |
+
"extreme_droite_ecart_nat", ascending=False
|
| 135 |
+
)
|
| 136 |
+
|
| 137 |
+
return result
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
def get_bv_timeseries(code_bv: str, tour: int | None) -> pd.DataFrame:
|
| 141 |
+
if df_sete.empty or not BLOC_COLS:
|
| 142 |
+
return pd.DataFrame(columns=["date_scrutin"] + BLOC_COLS)
|
| 143 |
+
subset = df_sete[df_sete["code_bv"].astype(str) == str(code_bv)].copy()
|
| 144 |
+
subset["tour"] = pd.to_numeric(subset["tour"], errors="coerce").astype("Int64")
|
| 145 |
+
if tour is not None:
|
| 146 |
+
subset = subset[subset["tour"] == tour]
|
| 147 |
+
subset = subset.dropna(subset=["date_scrutin"]).sort_values("date_scrutin")
|
| 148 |
+
return subset[["date_scrutin"] + BLOC_COLS]
|
| 149 |
+
|
| 150 |
+
|
| 151 |
+
def plot_bv_timeseries(code_bv: str, tour_choice, bloc_choices=None):
|
| 152 |
+
tour = None if tour_choice == "Tous" else int(tour_choice)
|
| 153 |
+
fig, ax = plt.subplots(figsize=(8, 4))
|
| 154 |
+
if not BLOC_COLS:
|
| 155 |
+
ax.text(0.5, 0.5, "Colonnes part_bloc_* absentes.", ha="center", va="center")
|
| 156 |
+
ax.axis("off")
|
| 157 |
+
return fig
|
| 158 |
+
df_ts = get_bv_timeseries(code_bv, tour)
|
| 159 |
+
if df_ts.empty:
|
| 160 |
+
tours_avail = (
|
| 161 |
+
df_sete[df_sete["code_bv"].astype(str) == str(code_bv)]["tour"]
|
| 162 |
+
.dropna()
|
| 163 |
+
.unique()
|
| 164 |
+
.tolist()
|
| 165 |
+
)
|
| 166 |
+
ax.text(
|
| 167 |
+
0.5,
|
| 168 |
+
0.5,
|
| 169 |
+
f"Aucune donnée après filtre tour={tour}. Valeurs disponibles: {sorted(tours_avail)}",
|
| 170 |
+
ha="center",
|
| 171 |
+
va="center",
|
| 172 |
+
wrap=True,
|
| 173 |
+
)
|
| 174 |
+
ax.axis("off")
|
| 175 |
+
return fig
|
| 176 |
+
|
| 177 |
+
selected = bloc_choices or BLOC_LABELS
|
| 178 |
+
selected_cols = [f"part_bloc_{b}" for b in selected if f"part_bloc_{b}" in df_ts.columns]
|
| 179 |
+
if not selected_cols:
|
| 180 |
+
ax.text(0.5, 0.5, "Aucun bloc sélectionné.", ha="center", va="center")
|
| 181 |
+
ax.axis("off")
|
| 182 |
+
return fig
|
| 183 |
+
for col in selected_cols:
|
| 184 |
+
ax.plot(df_ts["date_scrutin"], df_ts[col], label=col.replace("part_bloc_", ""))
|
| 185 |
+
ax.set_title(f"Évolution politique – BV {code_bv}")
|
| 186 |
+
ax.set_ylabel("Part des voix (exprimés)")
|
| 187 |
+
ax.grid(True, alpha=0.3)
|
| 188 |
+
ax.legend(bbox_to_anchor=(1.02, 1), loc="upper left", borderaxespad=0, fontsize=8)
|
| 189 |
+
fig.autofmt_xdate()
|
| 190 |
+
fig.tight_layout()
|
| 191 |
+
return fig
|
| 192 |
+
|
| 193 |
+
|
| 194 |
+
# =========================
|
| 195 |
+
# Interface Gradio
|
| 196 |
+
# =========================
|
| 197 |
+
|
| 198 |
+
def format_bv_label(code_bv: str) -> str:
|
| 199 |
+
code_str = str(code_bv)
|
| 200 |
+
if code_str.isdigit() and code_str.startswith(SETE_CODE_INSEE) and len(code_str) == 9:
|
| 201 |
+
bureau_num = code_str[-4:]
|
| 202 |
+
return f"BV {int(bureau_num)} ({code_str})"
|
| 203 |
+
return code_str
|
| 204 |
+
|
| 205 |
+
|
| 206 |
+
bv_values = (
|
| 207 |
+
sorted(df_sete["code_bv"].astype(str).unique().tolist())
|
| 208 |
+
if "code_bv" in df_sete.columns
|
| 209 |
+
else []
|
| 210 |
+
)
|
| 211 |
+
bv_choices = [(format_bv_label(code), code) for code in bv_values]
|
| 212 |
+
scrutins = sorted(df_sete["type_scrutin"].unique())
|
| 213 |
+
tours = sorted(df_sete["tour"].dropna().unique())
|
| 214 |
+
tour_options = ["Tous"] + [str(t) for t in tours]
|
| 215 |
+
status_messages = []
|
| 216 |
+
if commune_warning:
|
| 217 |
+
status_messages.append(commune_warning)
|
| 218 |
+
if df_sete.empty:
|
| 219 |
+
status_messages.append(
|
| 220 |
+
"Aucune ligne pour la commune 34301 (Sète). Vérifie `code_commune` / le filtre."
|
| 221 |
+
)
|
| 222 |
+
if not BLOC_COLS:
|
| 223 |
+
status_messages.append("Colonnes part_bloc_* absentes dans bv_features.")
|
| 224 |
+
missing_blocs = [f"part_bloc_{b}" for b in BASE_BLOCS if f"part_bloc_{b}" not in df_sete.columns]
|
| 225 |
+
if missing_blocs:
|
| 226 |
+
status_messages.append(f"Colonnes blocs manquantes: {', '.join(missing_blocs)}")
|
| 227 |
+
tour_dtype = str(df_sete["tour"].dtype) if "tour" in df_sete.columns else "n/a"
|
| 228 |
+
tour_sample = sorted(df_sete["tour"].dropna().unique().tolist())[:10]
|
| 229 |
+
status_messages.append(f"tour dtype: {tour_dtype}")
|
| 230 |
+
status_messages.append(f"tours disponibles (échantillon): {tour_sample}")
|
| 231 |
+
status_messages.append(
|
| 232 |
+
f"df_sete: {len(df_sete)} lignes, {df_sete['code_bv'].nunique() if 'code_bv' in df_sete.columns else 0} BV"
|
| 233 |
+
)
|
| 234 |
+
status_messages.append(f"blocs actifs: {', '.join(BLOC_LABELS) if BLOC_LABELS else 'aucun'}")
|
| 235 |
+
status_text = "\n".join(f"- {msg}" for msg in status_messages)
|
| 236 |
+
|
| 237 |
+
with gr.Blocks(title="Résultats électoraux – Bureaux de vote de Sète") as app:
|
| 238 |
+
gr.Markdown(
|
| 239 |
+
"""
|
| 240 |
+
# 🗳️ Résultats électoraux – Ville de Sète
|
| 241 |
+
|
| 242 |
+
**Bureaux de vote uniquement – comparaison au niveau national**
|
| 243 |
+
|
| 244 |
+
Les pourcentages sont exprimés en **% des exprimés**.
|
| 245 |
+
Les écarts sont en **points par rapport au national**.
|
| 246 |
+
"""
|
| 247 |
+
)
|
| 248 |
+
if status_text:
|
| 249 |
+
gr.Markdown(f"**Alertes**\n{status_text}")
|
| 250 |
+
|
| 251 |
+
with gr.Tabs():
|
| 252 |
+
with gr.Tab("Bureaux de vote"):
|
| 253 |
+
with gr.Row():
|
| 254 |
+
type_scrutin = gr.Dropdown(
|
| 255 |
+
scrutins,
|
| 256 |
+
label="Type de scrutin",
|
| 257 |
+
value=scrutins[0] if scrutins else None,
|
| 258 |
+
)
|
| 259 |
+
tour = gr.Dropdown(
|
| 260 |
+
tours,
|
| 261 |
+
label="Tour",
|
| 262 |
+
value=tours[0] if tours else None,
|
| 263 |
+
)
|
| 264 |
+
|
| 265 |
+
output = gr.Dataframe(
|
| 266 |
+
label="Bureaux de vote – parts locales et écart au national",
|
| 267 |
+
interactive=False,
|
| 268 |
+
wrap=True,
|
| 269 |
+
)
|
| 270 |
+
|
| 271 |
+
btn = gr.Button("Afficher")
|
| 272 |
+
|
| 273 |
+
btn.click(
|
| 274 |
+
fn=table_sete,
|
| 275 |
+
inputs=[type_scrutin, tour],
|
| 276 |
+
outputs=output,
|
| 277 |
+
)
|
| 278 |
+
|
| 279 |
+
with gr.Tab("Évolution temporelle"):
|
| 280 |
+
bv_selector = gr.Dropdown(
|
| 281 |
+
bv_choices,
|
| 282 |
+
label="Bureau de vote",
|
| 283 |
+
value=bv_values[0] if bv_values else None,
|
| 284 |
+
)
|
| 285 |
+
tour_selector = gr.Dropdown(
|
| 286 |
+
tour_options,
|
| 287 |
+
label="Tour",
|
| 288 |
+
value="Tous",
|
| 289 |
+
)
|
| 290 |
+
blocs_selector = gr.Dropdown(
|
| 291 |
+
BLOC_LABELS,
|
| 292 |
+
label="Blocs à afficher",
|
| 293 |
+
value=BLOC_LABELS,
|
| 294 |
+
multiselect=True,
|
| 295 |
+
)
|
| 296 |
+
plot = gr.Plot(
|
| 297 |
+
value=plot_bv_timeseries(
|
| 298 |
+
bv_values[0] if bv_values else "", "Tous", BLOC_LABELS
|
| 299 |
+
)
|
| 300 |
+
)
|
| 301 |
+
|
| 302 |
+
bv_selector.change(
|
| 303 |
+
fn=plot_bv_timeseries,
|
| 304 |
+
inputs=[bv_selector, tour_selector, blocs_selector],
|
| 305 |
+
outputs=plot,
|
| 306 |
+
)
|
| 307 |
+
tour_selector.change(
|
| 308 |
+
fn=plot_bv_timeseries,
|
| 309 |
+
inputs=[bv_selector, tour_selector, blocs_selector],
|
| 310 |
+
outputs=plot,
|
| 311 |
+
)
|
| 312 |
+
blocs_selector.change(
|
| 313 |
+
fn=plot_bv_timeseries,
|
| 314 |
+
inputs=[bv_selector, tour_selector, blocs_selector],
|
| 315 |
+
outputs=plot,
|
| 316 |
+
)
|
| 317 |
+
|
| 318 |
+
# =========================
|
| 319 |
+
# Lancement
|
| 320 |
+
# =========================
|
| 321 |
+
# Tests manuels:
|
| 322 |
+
# 1) Lancer l'app.
|
| 323 |
+
# 2) Onglet "Évolution temporelle": choisir un BV, tester Tous / Tour 1 / Tour 2.
|
| 324 |
+
# 3) Vérifier que la légende n'occulte pas les courbes et que seuls 8 blocs apparaissent.
|
| 325 |
+
# 4) Vérifier le libellé BV (BV X + code) et les alertes en haut de page.
|
| 326 |
+
|
| 327 |
+
if __name__ == "__main__":
|
| 328 |
+
app.launch()
|
app/gradio_app.py
ADDED
|
@@ -0,0 +1,1645 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import base64
|
| 4 |
+
import io
|
| 5 |
+
import json
|
| 6 |
+
import logging
|
| 7 |
+
import re
|
| 8 |
+
import warnings
|
| 9 |
+
from html import escape
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
from typing import Dict, Tuple
|
| 12 |
+
|
| 13 |
+
import gradio as gr
|
| 14 |
+
import joblib
|
| 15 |
+
import numpy as np
|
| 16 |
+
import pandas as pd
|
| 17 |
+
import sqlalchemy as sa
|
| 18 |
+
|
| 19 |
+
from src.constants import CANDIDATE_CATEGORIES
|
| 20 |
+
from src.db.schema import get_engine
|
| 21 |
+
from src.features.build_features import (
|
| 22 |
+
aggregate_by_event,
|
| 23 |
+
compute_national_reference,
|
| 24 |
+
expand_by_category,
|
| 25 |
+
load_elections_long,
|
| 26 |
+
load_mapping,
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
LOGGER = logging.getLogger(__name__)
|
| 30 |
+
COMMUNE_CODE_SETE = "34301"
|
| 31 |
+
MODEL_DIR = Path("models")
|
| 32 |
+
FEATURE_COLS_PATH = MODEL_DIR / "feature_columns.json"
|
| 33 |
+
RESIDUAL_INTERVALS_PATH = Path("reports/residual_intervals.json")
|
| 34 |
+
GEO_DIR = Path("data/geo")
|
| 35 |
+
DEFAULT_TARGETS = [
|
| 36 |
+
("municipales", 2026),
|
| 37 |
+
("legislatives", 2027),
|
| 38 |
+
("presidentielles", 2027),
|
| 39 |
+
]
|
| 40 |
+
FEATURE_CACHE: Dict[Tuple[str, int], Tuple[pd.DataFrame, Dict[str, Dict[Tuple[str, str], float]]]] = {}
|
| 41 |
+
ELECTION_KEY_SEP = "|"
|
| 42 |
+
ELECTION_TYPE_LABELS = {
|
| 43 |
+
"municipales": "Municipales",
|
| 44 |
+
"legislatives": "Législatives",
|
| 45 |
+
"presidentielles": "Présidentielles",
|
| 46 |
+
"europeennes": "Européennes",
|
| 47 |
+
"regionales": "Régionales",
|
| 48 |
+
"departementales": "Départementales",
|
| 49 |
+
}
|
| 50 |
+
HISTORY_OUTPUT_COLUMNS = ["categorie", "score_%"]
|
| 51 |
+
PREDICTION_OUTPUT_COLUMNS = ["categorie", "nombre"]
|
| 52 |
+
INTERVAL_OUTPUT_COLUMNS = ["categorie", "baseline_%", "min_%", "max_%", "baseline", "min", "max"]
|
| 53 |
+
SIM_OUTPUT_COLUMNS = ["categorie", "baseline", "apres_transfert", "delta"]
|
| 54 |
+
OPPORTUNITY_OUTPUT_COLUMNS = [
|
| 55 |
+
"bureau",
|
| 56 |
+
"gain_cible",
|
| 57 |
+
"score_base",
|
| 58 |
+
"score_apres",
|
| 59 |
+
"top_base",
|
| 60 |
+
"top_apres",
|
| 61 |
+
"bascule",
|
| 62 |
+
]
|
| 63 |
+
DISPLAY_CATEGORY_ORDER = [
|
| 64 |
+
"extreme_gauche",
|
| 65 |
+
"gauche_dure",
|
| 66 |
+
"gauche_modere",
|
| 67 |
+
"centre",
|
| 68 |
+
"droite_modere",
|
| 69 |
+
"droite_dure",
|
| 70 |
+
"extreme_droite",
|
| 71 |
+
]
|
| 72 |
+
PREDICTION_CATEGORY_ORDER = DISPLAY_CATEGORY_ORDER + ["blancs", "nuls", "abstention"]
|
| 73 |
+
DISPLAY_CATEGORY_LABELS = {
|
| 74 |
+
"extreme_gauche": "extrême-gauche",
|
| 75 |
+
"gauche_dure": "gauche dure",
|
| 76 |
+
"gauche_modere": "gauche modérée",
|
| 77 |
+
"centre": "centre",
|
| 78 |
+
"droite_modere": "droite modérée",
|
| 79 |
+
"droite_dure": "droite dure",
|
| 80 |
+
"extreme_droite": "extrême-droite",
|
| 81 |
+
"blancs": "blancs",
|
| 82 |
+
"nuls": "nuls",
|
| 83 |
+
"abstention": "abstentions",
|
| 84 |
+
}
|
| 85 |
+
DISPLAY_CATEGORY_COLORS = {
|
| 86 |
+
"extreme_gauche": "#7f1d1d",
|
| 87 |
+
"gauche_dure": "#dc2626",
|
| 88 |
+
"gauche_modere": "#f472b6",
|
| 89 |
+
"centre": "#facc15",
|
| 90 |
+
"droite_modere": "#60a5fa",
|
| 91 |
+
"droite_dure": "#1e3a8a",
|
| 92 |
+
"extreme_droite": "#111827",
|
| 93 |
+
}
|
| 94 |
+
EXTRA_CATEGORY_COLORS = {
|
| 95 |
+
"blancs": "#e5e7eb",
|
| 96 |
+
"nuls": "#9ca3af",
|
| 97 |
+
"abstention": "#6b7280",
|
| 98 |
+
}
|
| 99 |
+
DISPLAY_LABEL_COLORS = {
|
| 100 |
+
DISPLAY_CATEGORY_LABELS[key]: color for key, color in DISPLAY_CATEGORY_COLORS.items()
|
| 101 |
+
}
|
| 102 |
+
DISPLAY_LABEL_COLORS.update(
|
| 103 |
+
{DISPLAY_CATEGORY_LABELS[key]: color for key, color in EXTRA_CATEGORY_COLORS.items()}
|
| 104 |
+
)
|
| 105 |
+
CATEGORY_LABEL_TO_KEY = {label: key for key, label in DISPLAY_CATEGORY_LABELS.items()}
|
| 106 |
+
TRANSFER_CATEGORY_LABELS = [DISPLAY_CATEGORY_LABELS[key] for key in PREDICTION_CATEGORY_ORDER]
|
| 107 |
+
DEFAULT_RESIDUAL_SPREAD = 0.03
|
| 108 |
+
INTERVAL_BANDS = {
|
| 109 |
+
"80% (p10-p90)": ("q10", "q90"),
|
| 110 |
+
"90% (p05-p95)": ("q05", "q95"),
|
| 111 |
+
}
|
| 112 |
+
NEUTRAL_MARGIN_SHARE = 0.10
|
| 113 |
+
|
| 114 |
+
try:
|
| 115 |
+
from numpy import RankWarning as NP_RANK_WARNING # type: ignore[attr-defined]
|
| 116 |
+
except Exception:
|
| 117 |
+
class NP_RANK_WARNING(UserWarning):
|
| 118 |
+
pass
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
def ordered_categories() -> list[str]:
|
| 122 |
+
return [cat for cat in DISPLAY_CATEGORY_ORDER if cat in CANDIDATE_CATEGORIES]
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
def load_residual_intervals(path: Path = RESIDUAL_INTERVALS_PATH) -> Dict[str, object]:
|
| 126 |
+
if not path.exists():
|
| 127 |
+
return {}
|
| 128 |
+
try:
|
| 129 |
+
payload = json.loads(path.read_text())
|
| 130 |
+
except Exception:
|
| 131 |
+
return {}
|
| 132 |
+
if isinstance(payload, dict):
|
| 133 |
+
return payload
|
| 134 |
+
return {}
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
def get_interval_bounds(
|
| 138 |
+
residuals: Dict[str, Dict[str, float]],
|
| 139 |
+
category: str,
|
| 140 |
+
band_label: str,
|
| 141 |
+
) -> Tuple[float, float]:
|
| 142 |
+
keys = INTERVAL_BANDS.get(band_label, ("q10", "q90"))
|
| 143 |
+
cat_resid = residuals.get(category, {})
|
| 144 |
+
low = cat_resid.get(keys[0])
|
| 145 |
+
high = cat_resid.get(keys[1])
|
| 146 |
+
if low is None or high is None:
|
| 147 |
+
return -DEFAULT_RESIDUAL_SPREAD, DEFAULT_RESIDUAL_SPREAD
|
| 148 |
+
return float(low), float(high)
|
| 149 |
+
|
| 150 |
+
|
| 151 |
+
def build_interval_table(
|
| 152 |
+
shares_by_cat: Dict[str, float],
|
| 153 |
+
exprimes_total: int,
|
| 154 |
+
residuals: Dict[str, Dict[str, float]],
|
| 155 |
+
band_label: str,
|
| 156 |
+
) -> pd.DataFrame:
|
| 157 |
+
rows = []
|
| 158 |
+
for cat in ordered_categories():
|
| 159 |
+
share = float(shares_by_cat.get(cat, 0.0))
|
| 160 |
+
low_resid, high_resid = get_interval_bounds(residuals, cat, band_label)
|
| 161 |
+
share_low = float(np.clip(share + low_resid, 0.0, 1.0))
|
| 162 |
+
share_high = float(np.clip(share + high_resid, 0.0, 1.0))
|
| 163 |
+
count = int(round(share * exprimes_total))
|
| 164 |
+
count_low = int(round(share_low * exprimes_total))
|
| 165 |
+
count_high = int(round(share_high * exprimes_total))
|
| 166 |
+
if count_low > count_high:
|
| 167 |
+
count_low, count_high = count_high, count_low
|
| 168 |
+
share_low, share_high = share_high, share_low
|
| 169 |
+
rows.append(
|
| 170 |
+
{
|
| 171 |
+
"categorie": DISPLAY_CATEGORY_LABELS.get(cat, cat),
|
| 172 |
+
"baseline_%": round(share * 100, 1),
|
| 173 |
+
"min_%": round(share_low * 100, 1),
|
| 174 |
+
"max_%": round(share_high * 100, 1),
|
| 175 |
+
"baseline": count,
|
| 176 |
+
"min": count_low,
|
| 177 |
+
"max": count_high,
|
| 178 |
+
}
|
| 179 |
+
)
|
| 180 |
+
return pd.DataFrame(rows, columns=INTERVAL_OUTPUT_COLUMNS)
|
| 181 |
+
|
| 182 |
+
|
| 183 |
+
def build_interval_chart(
|
| 184 |
+
df: pd.DataFrame,
|
| 185 |
+
*,
|
| 186 |
+
value_col: str = "baseline",
|
| 187 |
+
low_col: str = "min",
|
| 188 |
+
high_col: str = "max",
|
| 189 |
+
color_map: Dict[str, str] | None = None,
|
| 190 |
+
ylabel: str = "Nombre d'électeurs",
|
| 191 |
+
):
|
| 192 |
+
try:
|
| 193 |
+
import matplotlib.pyplot as plt
|
| 194 |
+
except Exception:
|
| 195 |
+
return None
|
| 196 |
+
if df.empty or value_col not in df.columns:
|
| 197 |
+
return None
|
| 198 |
+
labels = df["categorie"].astype(str).tolist()
|
| 199 |
+
values = df[value_col].astype(float).to_numpy()
|
| 200 |
+
low_vals = df[low_col].astype(float).to_numpy()
|
| 201 |
+
high_vals = df[high_col].astype(float).to_numpy()
|
| 202 |
+
lower_err = np.maximum(0.0, values - low_vals)
|
| 203 |
+
upper_err = np.maximum(0.0, high_vals - values)
|
| 204 |
+
yerr = np.vstack([lower_err, upper_err])
|
| 205 |
+
colors = [color_map.get(label, "#3b82f6") for label in labels] if color_map else "#3b82f6"
|
| 206 |
+
plt.figure(figsize=(6, 3))
|
| 207 |
+
plt.bar(labels, values, color=colors, yerr=yerr, capsize=4)
|
| 208 |
+
plt.xticks(rotation=30, ha="right")
|
| 209 |
+
plt.ylabel(ylabel)
|
| 210 |
+
plt.tight_layout()
|
| 211 |
+
return plt
|
| 212 |
+
|
| 213 |
+
|
| 214 |
+
def apply_transfers(
|
| 215 |
+
counts: Dict[str, int],
|
| 216 |
+
total_inscrits: int,
|
| 217 |
+
transfers: list[Tuple[str, str, float]],
|
| 218 |
+
) -> Dict[str, int]:
|
| 219 |
+
updated = {key: int(value) for key, value in counts.items()}
|
| 220 |
+
for source, target, delta_pct in transfers:
|
| 221 |
+
if delta_pct <= 0 or source == target:
|
| 222 |
+
continue
|
| 223 |
+
delta_count = int(round(total_inscrits * float(delta_pct) / 100.0))
|
| 224 |
+
if delta_count <= 0:
|
| 225 |
+
continue
|
| 226 |
+
available = max(0, int(updated.get(source, 0)))
|
| 227 |
+
moved = min(available, delta_count)
|
| 228 |
+
updated[source] = available - moved
|
| 229 |
+
updated[target] = int(updated.get(target, 0)) + moved
|
| 230 |
+
return updated
|
| 231 |
+
|
| 232 |
+
|
| 233 |
+
def build_simulation_table(
|
| 234 |
+
baseline: Dict[str, int],
|
| 235 |
+
updated: Dict[str, int],
|
| 236 |
+
) -> pd.DataFrame:
|
| 237 |
+
rows = []
|
| 238 |
+
for cat in PREDICTION_CATEGORY_ORDER:
|
| 239 |
+
base = int(baseline.get(cat, 0))
|
| 240 |
+
new = int(updated.get(cat, 0))
|
| 241 |
+
rows.append(
|
| 242 |
+
{
|
| 243 |
+
"categorie": DISPLAY_CATEGORY_LABELS.get(cat, cat),
|
| 244 |
+
"baseline": base,
|
| 245 |
+
"apres_transfert": new,
|
| 246 |
+
"delta": new - base,
|
| 247 |
+
}
|
| 248 |
+
)
|
| 249 |
+
return pd.DataFrame(rows, columns=SIM_OUTPUT_COLUMNS)
|
| 250 |
+
|
| 251 |
+
|
| 252 |
+
def load_geojson_features(geo_dir: Path = GEO_DIR) -> list[dict]:
|
| 253 |
+
if not geo_dir.exists():
|
| 254 |
+
return []
|
| 255 |
+
paths = sorted(geo_dir.glob("*.geojson")) + sorted(geo_dir.glob("*.json"))
|
| 256 |
+
features: list[dict] = []
|
| 257 |
+
for path in paths:
|
| 258 |
+
try:
|
| 259 |
+
payload = json.loads(path.read_text())
|
| 260 |
+
except Exception:
|
| 261 |
+
continue
|
| 262 |
+
if isinstance(payload, dict):
|
| 263 |
+
features.extend(payload.get("features", []))
|
| 264 |
+
return features
|
| 265 |
+
|
| 266 |
+
|
| 267 |
+
def extract_bureau_number(label: str | None) -> int | None:
|
| 268 |
+
if not label:
|
| 269 |
+
return None
|
| 270 |
+
match = re.search(r"(\d+)", str(label))
|
| 271 |
+
if not match:
|
| 272 |
+
return None
|
| 273 |
+
try:
|
| 274 |
+
return int(match.group(1))
|
| 275 |
+
except ValueError:
|
| 276 |
+
return None
|
| 277 |
+
|
| 278 |
+
|
| 279 |
+
def match_bureau_code(commune_code: str, bureau_num: int, available_codes: set[str]) -> str:
|
| 280 |
+
padded = str(bureau_num).zfill(4)
|
| 281 |
+
candidates = [f"{commune_code}-{padded}", f"{commune_code}{padded}"]
|
| 282 |
+
for candidate in candidates:
|
| 283 |
+
if candidate in available_codes:
|
| 284 |
+
return candidate
|
| 285 |
+
return candidates[-1]
|
| 286 |
+
|
| 287 |
+
|
| 288 |
+
def _iter_coords(geom: dict) -> list[Tuple[float, float]]:
|
| 289 |
+
coords = []
|
| 290 |
+
geom_type = geom.get("type")
|
| 291 |
+
if geom_type == "Polygon":
|
| 292 |
+
for ring in geom.get("coordinates", []):
|
| 293 |
+
coords.extend([(lon, lat) for lon, lat in ring])
|
| 294 |
+
elif geom_type == "MultiPolygon":
|
| 295 |
+
for polygon in geom.get("coordinates", []):
|
| 296 |
+
for ring in polygon:
|
| 297 |
+
coords.extend([(lon, lat) for lon, lat in ring])
|
| 298 |
+
return coords
|
| 299 |
+
|
| 300 |
+
|
| 301 |
+
def geojson_bounds(features: list[dict]) -> Tuple[Tuple[float, float], Tuple[float, float]] | None:
|
| 302 |
+
lons = []
|
| 303 |
+
lats = []
|
| 304 |
+
for feature in features:
|
| 305 |
+
geom = feature.get("geometry") or {}
|
| 306 |
+
for lon, lat in _iter_coords(geom):
|
| 307 |
+
lons.append(lon)
|
| 308 |
+
lats.append(lat)
|
| 309 |
+
if not lons or not lats:
|
| 310 |
+
return None
|
| 311 |
+
return (min(lats), min(lons)), (max(lats), max(lons))
|
| 312 |
+
|
| 313 |
+
|
| 314 |
+
def build_prediction_table_from_counts(counts_by_cat: Dict[str, int]) -> pd.DataFrame:
|
| 315 |
+
rows = []
|
| 316 |
+
for cat in ordered_categories():
|
| 317 |
+
rows.append({"categorie": DISPLAY_CATEGORY_LABELS.get(cat, cat), "nombre": int(counts_by_cat.get(cat, 0))})
|
| 318 |
+
for extra in ["blancs", "nuls", "abstention"]:
|
| 319 |
+
rows.append(
|
| 320 |
+
{
|
| 321 |
+
"categorie": DISPLAY_CATEGORY_LABELS[extra],
|
| 322 |
+
"nombre": int(counts_by_cat.get(extra, 0)),
|
| 323 |
+
}
|
| 324 |
+
)
|
| 325 |
+
return pd.DataFrame(rows, columns=PREDICTION_OUTPUT_COLUMNS)
|
| 326 |
+
|
| 327 |
+
|
| 328 |
+
def chart_base64_from_df(
|
| 329 |
+
df: pd.DataFrame,
|
| 330 |
+
value_col: str,
|
| 331 |
+
ylabel: str,
|
| 332 |
+
color_map: Dict[str, str],
|
| 333 |
+
) -> str | None:
|
| 334 |
+
try:
|
| 335 |
+
import matplotlib.pyplot as plt
|
| 336 |
+
except Exception:
|
| 337 |
+
return None
|
| 338 |
+
if df.empty or value_col not in df.columns:
|
| 339 |
+
return None
|
| 340 |
+
labels = df["categorie"].astype(str).tolist()
|
| 341 |
+
values = pd.to_numeric(df[value_col], errors="coerce").fillna(0).tolist()
|
| 342 |
+
colors = [color_map.get(label, "#3b82f6") for label in labels]
|
| 343 |
+
fig, ax = plt.subplots(figsize=(4.5, 3.2))
|
| 344 |
+
ax.barh(labels, values, color=colors)
|
| 345 |
+
ax.invert_yaxis()
|
| 346 |
+
ax.set_xlabel(ylabel)
|
| 347 |
+
ax.tick_params(axis="y", labelsize=8)
|
| 348 |
+
fig.tight_layout()
|
| 349 |
+
buf = io.BytesIO()
|
| 350 |
+
fig.savefig(buf, format="png", dpi=150)
|
| 351 |
+
plt.close(fig)
|
| 352 |
+
return base64.b64encode(buf.getvalue()).decode("ascii")
|
| 353 |
+
|
| 354 |
+
|
| 355 |
+
def build_map_popup_html(
|
| 356 |
+
bureau_label: str,
|
| 357 |
+
table_df: pd.DataFrame,
|
| 358 |
+
chart_b64: str | None,
|
| 359 |
+
meta: str | None,
|
| 360 |
+
) -> str:
|
| 361 |
+
title_html = f"<strong>{escape(bureau_label)}</strong>"
|
| 362 |
+
meta_html = f"<div style='margin:4px 0;'>{escape(meta)}</div>" if meta else ""
|
| 363 |
+
table_html = table_df.to_html(index=False, border=0)
|
| 364 |
+
img_html = ""
|
| 365 |
+
if chart_b64:
|
| 366 |
+
img_html = (
|
| 367 |
+
"<div style='margin-top:6px;'>"
|
| 368 |
+
f"<img src='data:image/png;base64,{chart_b64}' style='width:320px;height:auto;'/>"
|
| 369 |
+
"</div>"
|
| 370 |
+
)
|
| 371 |
+
return f"<div style='font-size:12px;'>{title_html}{meta_html}{table_html}{img_html}</div>"
|
| 372 |
+
|
| 373 |
+
|
| 374 |
+
def build_map_legend_html() -> str:
|
| 375 |
+
parts = []
|
| 376 |
+
for key in DISPLAY_CATEGORY_ORDER:
|
| 377 |
+
label = DISPLAY_CATEGORY_LABELS.get(key, key)
|
| 378 |
+
color = DISPLAY_CATEGORY_COLORS.get(key, "#9ca3af")
|
| 379 |
+
parts.append(
|
| 380 |
+
f"<span style='display:inline-flex;align-items:center;margin-right:10px;'>"
|
| 381 |
+
f"<span style='width:12px;height:12px;background:{color};display:inline-block;margin-right:6px;border:1px solid #111827;'></span>"
|
| 382 |
+
f"{escape(label)}</span>"
|
| 383 |
+
)
|
| 384 |
+
parts.append(
|
| 385 |
+
"<span style='display:inline-flex;align-items:center;margin-right:10px;'>"
|
| 386 |
+
"<span style='width:12px;height:12px;background:#ffffff;display:inline-block;margin-right:6px;border:1px solid #111827;'></span>"
|
| 387 |
+
"écart gauche/droite ≤ 10%</span>"
|
| 388 |
+
)
|
| 389 |
+
parts.append(
|
| 390 |
+
"<span style='display:inline-flex;align-items:center;margin-right:10px;'>"
|
| 391 |
+
"<span style='width:12px;height:12px;background:#9ca3af;display:inline-block;margin-right:6px;border:1px solid #111827;'></span>"
|
| 392 |
+
"données indisponibles</span>"
|
| 393 |
+
)
|
| 394 |
+
parts.append("<span style='font-size:12px;color:#6b7280;'>abstention non utilisée pour la couleur</span>")
|
| 395 |
+
return "<div style='margin-bottom:8px;'>" + " ".join(parts) + "</div>"
|
| 396 |
+
|
| 397 |
+
|
| 398 |
+
def build_bureau_map_html(
|
| 399 |
+
backend: "PredictorBackend",
|
| 400 |
+
target_type: str,
|
| 401 |
+
target_year: int,
|
| 402 |
+
) -> str:
|
| 403 |
+
try:
|
| 404 |
+
import folium
|
| 405 |
+
except Exception:
|
| 406 |
+
return "<p>Folium n'est pas disponible. Installe-le via requirements.txt.</p>"
|
| 407 |
+
|
| 408 |
+
features = load_geojson_features()
|
| 409 |
+
if not features:
|
| 410 |
+
return "<p>Aucune geojson trouvée dans data/geo.</p>"
|
| 411 |
+
|
| 412 |
+
bounds = geojson_bounds(features)
|
| 413 |
+
if bounds is None:
|
| 414 |
+
return "<p>Impossible de calculer l'emprise de la carte.</p>"
|
| 415 |
+
(min_lat, min_lon), (max_lat, max_lon) = bounds
|
| 416 |
+
center = [(min_lat + max_lat) / 2, (min_lon + max_lon) / 2]
|
| 417 |
+
fmap = folium.Map(location=center, zoom_start=13, tiles="cartodbpositron")
|
| 418 |
+
|
| 419 |
+
available_codes = set(backend.available_bureaux())
|
| 420 |
+
for feature in features:
|
| 421 |
+
props = feature.get("properties", {})
|
| 422 |
+
label = props.get("name") or "Bureau"
|
| 423 |
+
bureau_num = extract_bureau_number(label)
|
| 424 |
+
if bureau_num is None:
|
| 425 |
+
code_bv = None
|
| 426 |
+
else:
|
| 427 |
+
code_bv = match_bureau_code(COMMUNE_CODE_SETE, bureau_num, available_codes)
|
| 428 |
+
|
| 429 |
+
fill_color = "#9ca3af"
|
| 430 |
+
popup_html = None
|
| 431 |
+
if code_bv is not None:
|
| 432 |
+
details, _, meta = backend.predict_bureau_details(code_bv, target_type, target_year)
|
| 433 |
+
if details is not None:
|
| 434 |
+
shares = details["shares_by_cat"]
|
| 435 |
+
left_share = float(shares.get("gauche_dure", 0.0) + shares.get("gauche_modere", 0.0))
|
| 436 |
+
right_share = float(shares.get("droite_dure", 0.0) + shares.get("droite_modere", 0.0))
|
| 437 |
+
if abs(left_share - right_share) <= NEUTRAL_MARGIN_SHARE:
|
| 438 |
+
fill_color = "#ffffff"
|
| 439 |
+
else:
|
| 440 |
+
winner = max(shares, key=shares.get)
|
| 441 |
+
fill_color = DISPLAY_CATEGORY_COLORS.get(winner, fill_color)
|
| 442 |
+
|
| 443 |
+
table_df = build_prediction_table_from_counts(details["counts"])
|
| 444 |
+
chart_b64 = chart_base64_from_df(
|
| 445 |
+
table_df,
|
| 446 |
+
value_col="nombre",
|
| 447 |
+
ylabel="Nombre d'electeurs",
|
| 448 |
+
color_map=DISPLAY_LABEL_COLORS,
|
| 449 |
+
)
|
| 450 |
+
popup_html = build_map_popup_html(str(label), table_df, chart_b64, meta)
|
| 451 |
+
|
| 452 |
+
def _style(_: dict, color=fill_color):
|
| 453 |
+
return {
|
| 454 |
+
"fillColor": color,
|
| 455 |
+
"color": "#111827",
|
| 456 |
+
"weight": 1,
|
| 457 |
+
"fillOpacity": 0.6,
|
| 458 |
+
}
|
| 459 |
+
|
| 460 |
+
geo = folium.GeoJson(feature, style_function=_style)
|
| 461 |
+
if popup_html:
|
| 462 |
+
geo.add_child(folium.Popup(popup_html, max_width=450))
|
| 463 |
+
geo.add_child(folium.Tooltip(str(label)))
|
| 464 |
+
geo.add_to(fmap)
|
| 465 |
+
|
| 466 |
+
fmap.fit_bounds([[min_lat, min_lon], [max_lat, max_lon]])
|
| 467 |
+
return fmap._repr_html_()
|
| 468 |
+
|
| 469 |
+
|
| 470 |
+
def _project_rate(
|
| 471 |
+
series: pd.Series,
|
| 472 |
+
years: pd.Series,
|
| 473 |
+
target_year: int,
|
| 474 |
+
*,
|
| 475 |
+
min_points_trend: int = 3,
|
| 476 |
+
clamp_to_observed: bool = True,
|
| 477 |
+
) -> float | None:
|
| 478 |
+
df = pd.DataFrame(
|
| 479 |
+
{
|
| 480 |
+
"value": pd.to_numeric(series, errors="coerce"),
|
| 481 |
+
"year": pd.to_numeric(years, errors="coerce"),
|
| 482 |
+
}
|
| 483 |
+
).dropna()
|
| 484 |
+
if df.empty:
|
| 485 |
+
return None
|
| 486 |
+
values = df["value"].to_numpy()
|
| 487 |
+
years_arr = df["year"].to_numpy()
|
| 488 |
+
if len(set(years_arr)) >= min_points_trend and len(df) >= min_points_trend:
|
| 489 |
+
with warnings.catch_warnings():
|
| 490 |
+
warnings.simplefilter("ignore", category=NP_RANK_WARNING)
|
| 491 |
+
try:
|
| 492 |
+
slope, intercept = np.polyfit(years_arr, values, 1)
|
| 493 |
+
projected = slope * target_year + intercept
|
| 494 |
+
except Exception:
|
| 495 |
+
projected = values[-1]
|
| 496 |
+
else:
|
| 497 |
+
projected = values[-1]
|
| 498 |
+
if clamp_to_observed and len(values):
|
| 499 |
+
projected = min(max(projected, float(np.nanmin(values))), float(np.nanmax(values)))
|
| 500 |
+
return float(min(1.0, max(0.0, projected)))
|
| 501 |
+
|
| 502 |
+
|
| 503 |
+
def _allocate_counts(shares: np.ndarray, total: int) -> np.ndarray:
|
| 504 |
+
if total <= 0 or shares.size == 0:
|
| 505 |
+
return np.zeros_like(shares, dtype=int)
|
| 506 |
+
shares = np.clip(shares, 0, None)
|
| 507 |
+
if shares.sum() == 0:
|
| 508 |
+
return np.zeros_like(shares, dtype=int)
|
| 509 |
+
shares = shares / shares.sum()
|
| 510 |
+
raw = shares * total
|
| 511 |
+
floors = np.floor(raw)
|
| 512 |
+
remainder = int(total - floors.sum())
|
| 513 |
+
if remainder > 0:
|
| 514 |
+
order = np.argsort(-(raw - floors))
|
| 515 |
+
for idx in order[:remainder]:
|
| 516 |
+
floors[idx] += 1
|
| 517 |
+
return floors.astype(int)
|
| 518 |
+
|
| 519 |
+
|
| 520 |
+
def load_bureau_event_stats(commune_code: str) -> pd.DataFrame:
|
| 521 |
+
candidates = [
|
| 522 |
+
Path("data/processed/elections_blocs.parquet"),
|
| 523 |
+
Path("data/processed/elections_blocs.csv"),
|
| 524 |
+
Path("data/interim/elections_long.parquet"),
|
| 525 |
+
Path("data/interim/elections_long.csv"),
|
| 526 |
+
]
|
| 527 |
+
df = pd.DataFrame()
|
| 528 |
+
best = pd.DataFrame()
|
| 529 |
+
for path in candidates:
|
| 530 |
+
if not path.exists():
|
| 531 |
+
continue
|
| 532 |
+
if path.suffix == ".parquet":
|
| 533 |
+
df = pd.read_parquet(path)
|
| 534 |
+
else:
|
| 535 |
+
df = pd.read_csv(path, sep=";")
|
| 536 |
+
if df.empty:
|
| 537 |
+
continue
|
| 538 |
+
if "type_scrutin" not in df.columns and "election_type" in df.columns:
|
| 539 |
+
df["type_scrutin"] = df["election_type"]
|
| 540 |
+
if "annee" not in df.columns and "election_year" in df.columns:
|
| 541 |
+
df["annee"] = df["election_year"]
|
| 542 |
+
if "tour" not in df.columns and "round" in df.columns:
|
| 543 |
+
df["tour"] = df["round"]
|
| 544 |
+
df["date_scrutin"] = pd.to_datetime(df.get("date_scrutin"), errors="coerce")
|
| 545 |
+
for col in ["inscrits", "votants", "blancs", "nuls"]:
|
| 546 |
+
if col in df.columns:
|
| 547 |
+
df[col] = pd.to_numeric(df[col], errors="coerce")
|
| 548 |
+
else:
|
| 549 |
+
df[col] = np.nan
|
| 550 |
+
if "code_commune" in df.columns:
|
| 551 |
+
df["code_commune"] = df["code_commune"].astype(str)
|
| 552 |
+
df = df[df["code_commune"] == str(commune_code)]
|
| 553 |
+
else:
|
| 554 |
+
df = df[df["code_bv"].astype(str).str.startswith(str(commune_code))]
|
| 555 |
+
df = df.dropna(subset=["code_bv"])
|
| 556 |
+
if df.empty:
|
| 557 |
+
continue
|
| 558 |
+
has_blancs = df["blancs"].notna().any() or df["nuls"].notna().any()
|
| 559 |
+
if has_blancs:
|
| 560 |
+
best = df
|
| 561 |
+
break
|
| 562 |
+
if best.empty:
|
| 563 |
+
best = df
|
| 564 |
+
df = best
|
| 565 |
+
if df.empty:
|
| 566 |
+
return df
|
| 567 |
+
group_cols = [col for col in ["code_bv", "type_scrutin", "annee", "tour", "date_scrutin"] if col in df.columns]
|
| 568 |
+
agg = df.groupby(group_cols, as_index=False).agg(
|
| 569 |
+
inscrits=("inscrits", "max"),
|
| 570 |
+
votants=("votants", "max"),
|
| 571 |
+
blancs=("blancs", "max"),
|
| 572 |
+
nuls=("nuls", "max"),
|
| 573 |
+
)
|
| 574 |
+
if "date_scrutin" in agg.columns:
|
| 575 |
+
agg = agg.sort_values("date_scrutin")
|
| 576 |
+
agg["election_type"] = agg.get("type_scrutin")
|
| 577 |
+
agg["election_type"] = agg["election_type"].astype("string").str.strip().str.lower()
|
| 578 |
+
agg["election_year"] = pd.to_numeric(agg.get("annee"), errors="coerce")
|
| 579 |
+
agg["round"] = pd.to_numeric(agg.get("tour"), errors="coerce").fillna(1).astype(int)
|
| 580 |
+
base_inscrits = agg["inscrits"].replace(0, np.nan)
|
| 581 |
+
agg["turnout_pct"] = agg["votants"] / base_inscrits
|
| 582 |
+
agg["blancs_pct"] = agg["blancs"] / base_inscrits
|
| 583 |
+
agg["nuls_pct"] = agg["nuls"] / base_inscrits
|
| 584 |
+
return agg[
|
| 585 |
+
[
|
| 586 |
+
"code_bv",
|
| 587 |
+
"election_type",
|
| 588 |
+
"election_year",
|
| 589 |
+
"round",
|
| 590 |
+
"date_scrutin",
|
| 591 |
+
"inscrits",
|
| 592 |
+
"votants",
|
| 593 |
+
"blancs",
|
| 594 |
+
"nuls",
|
| 595 |
+
"turnout_pct",
|
| 596 |
+
"blancs_pct",
|
| 597 |
+
"nuls_pct",
|
| 598 |
+
]
|
| 599 |
+
]
|
| 600 |
+
|
| 601 |
+
|
| 602 |
+
def load_commune_event_stats(commune_code: str) -> pd.DataFrame:
|
| 603 |
+
candidates = [
|
| 604 |
+
Path("data/processed/commune_event_stats.parquet"),
|
| 605 |
+
Path("data/processed/commune_event_stats.csv"),
|
| 606 |
+
]
|
| 607 |
+
df = pd.DataFrame()
|
| 608 |
+
for path in candidates:
|
| 609 |
+
if not path.exists():
|
| 610 |
+
continue
|
| 611 |
+
if path.suffix == ".parquet":
|
| 612 |
+
df = pd.read_parquet(path)
|
| 613 |
+
else:
|
| 614 |
+
df = pd.read_csv(path, sep=";")
|
| 615 |
+
if not df.empty:
|
| 616 |
+
break
|
| 617 |
+
if df.empty:
|
| 618 |
+
return df
|
| 619 |
+
if "type_scrutin" not in df.columns and "election_type" in df.columns:
|
| 620 |
+
df["type_scrutin"] = df["election_type"]
|
| 621 |
+
if "annee" not in df.columns and "election_year" in df.columns:
|
| 622 |
+
df["annee"] = df["election_year"]
|
| 623 |
+
if "tour" not in df.columns and "round" in df.columns:
|
| 624 |
+
df["tour"] = df["round"]
|
| 625 |
+
df["date_scrutin"] = pd.to_datetime(df.get("date_scrutin"), errors="coerce")
|
| 626 |
+
for col in ["inscrits", "votants", "blancs", "nuls"]:
|
| 627 |
+
if col in df.columns:
|
| 628 |
+
df[col] = pd.to_numeric(df[col], errors="coerce")
|
| 629 |
+
else:
|
| 630 |
+
df[col] = np.nan
|
| 631 |
+
if "code_commune" in df.columns:
|
| 632 |
+
df["code_commune"] = df["code_commune"].astype(str)
|
| 633 |
+
df = df[df["code_commune"] == str(commune_code)]
|
| 634 |
+
else:
|
| 635 |
+
return pd.DataFrame()
|
| 636 |
+
if df.empty:
|
| 637 |
+
return df
|
| 638 |
+
base_inscrits = df["inscrits"].replace(0, np.nan)
|
| 639 |
+
if "turnout_pct" not in df.columns:
|
| 640 |
+
df["turnout_pct"] = df["votants"] / base_inscrits
|
| 641 |
+
if "blancs_pct" not in df.columns:
|
| 642 |
+
df["blancs_pct"] = df["blancs"] / base_inscrits
|
| 643 |
+
if "nuls_pct" not in df.columns:
|
| 644 |
+
df["nuls_pct"] = df["nuls"] / base_inscrits
|
| 645 |
+
df["election_type"] = df["type_scrutin"].astype("string").str.strip().str.lower()
|
| 646 |
+
df["election_year"] = pd.to_numeric(df.get("annee"), errors="coerce")
|
| 647 |
+
df["round"] = pd.to_numeric(df.get("tour"), errors="coerce").fillna(1).astype(int)
|
| 648 |
+
return df[
|
| 649 |
+
[
|
| 650 |
+
"code_commune",
|
| 651 |
+
"election_type",
|
| 652 |
+
"election_year",
|
| 653 |
+
"round",
|
| 654 |
+
"date_scrutin",
|
| 655 |
+
"inscrits",
|
| 656 |
+
"votants",
|
| 657 |
+
"blancs",
|
| 658 |
+
"nuls",
|
| 659 |
+
"turnout_pct",
|
| 660 |
+
"blancs_pct",
|
| 661 |
+
"nuls_pct",
|
| 662 |
+
]
|
| 663 |
+
]
|
| 664 |
+
|
| 665 |
+
|
| 666 |
+
def format_backend_label(backend_kind: str) -> str:
|
| 667 |
+
return "PostgreSQL" if backend_kind == "postgres" else "fichiers locaux"
|
| 668 |
+
|
| 669 |
+
|
| 670 |
+
def format_election_type_label(election_type: str) -> str:
|
| 671 |
+
label = ELECTION_TYPE_LABELS.get(election_type)
|
| 672 |
+
if label:
|
| 673 |
+
return label
|
| 674 |
+
return str(election_type).replace("_", " ").title()
|
| 675 |
+
|
| 676 |
+
|
| 677 |
+
def format_election_label(
|
| 678 |
+
election_type: str,
|
| 679 |
+
election_year: int,
|
| 680 |
+
round_num: int,
|
| 681 |
+
date_scrutin: pd.Timestamp | None = None,
|
| 682 |
+
) -> str:
|
| 683 |
+
base = f"{format_election_type_label(election_type)} {election_year} - Tour {round_num}"
|
| 684 |
+
if date_scrutin is None or pd.isna(date_scrutin):
|
| 685 |
+
return base
|
| 686 |
+
date_value = pd.to_datetime(date_scrutin).date().isoformat()
|
| 687 |
+
return f"{base} ({date_value})"
|
| 688 |
+
|
| 689 |
+
|
| 690 |
+
def format_election_key(election_type: str, election_year: int, round_num: int) -> str:
|
| 691 |
+
return f"{election_type}{ELECTION_KEY_SEP}{election_year}{ELECTION_KEY_SEP}{round_num}"
|
| 692 |
+
|
| 693 |
+
|
| 694 |
+
def parse_election_key(key: str) -> Tuple[str, int, int]:
|
| 695 |
+
parts = key.split(ELECTION_KEY_SEP)
|
| 696 |
+
if len(parts) != 3:
|
| 697 |
+
raise ValueError(f"Clé d'élection invalide: {key!r}")
|
| 698 |
+
return parts[0], int(parts[1]), int(parts[2])
|
| 699 |
+
|
| 700 |
+
|
| 701 |
+
def format_bureau_label(code_bv: str, bureau_label: str | None) -> str:
|
| 702 |
+
code = str(code_bv)
|
| 703 |
+
suffix = code.split("-")[-1] if "-" in code else code
|
| 704 |
+
if bureau_label is not None and not pd.isna(bureau_label):
|
| 705 |
+
label = str(bureau_label).strip()
|
| 706 |
+
if label and label != code:
|
| 707 |
+
return f"{label} ({code})"
|
| 708 |
+
return f"Bureau {suffix} ({code})"
|
| 709 |
+
|
| 710 |
+
|
| 711 |
+
def build_bureau_choices(history: pd.DataFrame) -> list[tuple[str, str]]:
|
| 712 |
+
if history.empty:
|
| 713 |
+
return []
|
| 714 |
+
if "bureau_label" in history.columns:
|
| 715 |
+
label_map = (
|
| 716 |
+
history[["code_bv", "bureau_label"]]
|
| 717 |
+
.dropna(subset=["code_bv"])
|
| 718 |
+
.drop_duplicates()
|
| 719 |
+
.sort_values("code_bv")
|
| 720 |
+
.groupby("code_bv", as_index=False)["bureau_label"]
|
| 721 |
+
.first()
|
| 722 |
+
)
|
| 723 |
+
return [
|
| 724 |
+
(format_bureau_label(row.code_bv, row.bureau_label), row.code_bv)
|
| 725 |
+
for row in label_map.itertuples(index=False)
|
| 726 |
+
]
|
| 727 |
+
codes = sorted(history["code_bv"].dropna().unique().tolist())
|
| 728 |
+
return [(format_bureau_label(code, None), code) for code in codes]
|
| 729 |
+
|
| 730 |
+
|
| 731 |
+
def build_history_choices(history: pd.DataFrame) -> list[tuple[str, str]]:
|
| 732 |
+
if history.empty:
|
| 733 |
+
return []
|
| 734 |
+
events = (
|
| 735 |
+
history[["election_type", "election_year", "round", "date_scrutin"]]
|
| 736 |
+
.dropna(subset=["election_type", "election_year", "round"])
|
| 737 |
+
.drop_duplicates()
|
| 738 |
+
.groupby(["election_type", "election_year", "round"], as_index=False)
|
| 739 |
+
.agg(date_scrutin=("date_scrutin", "min"))
|
| 740 |
+
.sort_values(["election_year", "election_type", "round"])
|
| 741 |
+
)
|
| 742 |
+
return [
|
| 743 |
+
(
|
| 744 |
+
format_election_label(
|
| 745 |
+
row.election_type,
|
| 746 |
+
int(row.election_year),
|
| 747 |
+
int(row.round),
|
| 748 |
+
row.date_scrutin,
|
| 749 |
+
),
|
| 750 |
+
format_election_key(row.election_type, int(row.election_year), int(row.round)),
|
| 751 |
+
)
|
| 752 |
+
for row in events.itertuples(index=False)
|
| 753 |
+
]
|
| 754 |
+
|
| 755 |
+
|
| 756 |
+
def clean_history_frame(history: pd.DataFrame) -> pd.DataFrame:
|
| 757 |
+
if history.empty:
|
| 758 |
+
return history
|
| 759 |
+
clean = history.copy()
|
| 760 |
+
clean["code_bv"] = clean["code_bv"].astype("string").str.strip()
|
| 761 |
+
clean["election_type"] = clean["election_type"].astype("string").str.strip().str.lower()
|
| 762 |
+
clean["category"] = clean["category"].astype("string").str.strip().str.lower()
|
| 763 |
+
if "bureau_label" in clean.columns:
|
| 764 |
+
clean["bureau_label"] = clean["bureau_label"].astype("string").str.strip()
|
| 765 |
+
clean["election_year"] = pd.to_numeric(clean["election_year"], errors="coerce")
|
| 766 |
+
clean["round"] = pd.to_numeric(clean["round"], errors="coerce").fillna(1)
|
| 767 |
+
clean["date_scrutin"] = pd.to_datetime(clean["date_scrutin"], errors="coerce")
|
| 768 |
+
for col in ["share", "share_nat", "turnout_pct"]:
|
| 769 |
+
if col in clean.columns:
|
| 770 |
+
clean[col] = pd.to_numeric(clean[col], errors="coerce").clip(lower=0, upper=1)
|
| 771 |
+
clean = clean.dropna(subset=["code_bv", "election_type", "election_year", "round", "category"])
|
| 772 |
+
clean["election_year"] = clean["election_year"].astype(int)
|
| 773 |
+
clean["round"] = clean["round"].astype(int)
|
| 774 |
+
clean = clean[clean["category"].isin(CANDIDATE_CATEGORIES)]
|
| 775 |
+
return clean
|
| 776 |
+
|
| 777 |
+
|
| 778 |
+
def prepare_history_table(history_slice: pd.DataFrame) -> pd.DataFrame:
|
| 779 |
+
if history_slice.empty:
|
| 780 |
+
return pd.DataFrame(columns=HISTORY_OUTPUT_COLUMNS)
|
| 781 |
+
grouped = history_slice.groupby("category", as_index=False).agg(share=("share", "sum"))
|
| 782 |
+
clean = pd.DataFrame({"category": ordered_categories()}).merge(grouped, on="category", how="left")
|
| 783 |
+
clean["share"] = pd.to_numeric(clean["share"], errors="coerce").fillna(0).clip(lower=0, upper=1)
|
| 784 |
+
clean["score_%"] = (clean["share"] * 100).round(1)
|
| 785 |
+
clean["categorie"] = clean["category"].map(DISPLAY_CATEGORY_LABELS).fillna(clean["category"])
|
| 786 |
+
return clean[HISTORY_OUTPUT_COLUMNS]
|
| 787 |
+
|
| 788 |
+
|
| 789 |
+
def format_history_meta(history_slice: pd.DataFrame) -> str:
|
| 790 |
+
if history_slice.empty:
|
| 791 |
+
return ""
|
| 792 |
+
parts = []
|
| 793 |
+
dates = history_slice["date_scrutin"].dropna()
|
| 794 |
+
if not dates.empty:
|
| 795 |
+
date_value = pd.to_datetime(dates.iloc[0]).date().isoformat()
|
| 796 |
+
parts.append(f"Date du scrutin : {date_value}")
|
| 797 |
+
turnout_vals = pd.to_numeric(history_slice["turnout_pct"], errors="coerce").dropna()
|
| 798 |
+
if not turnout_vals.empty:
|
| 799 |
+
parts.append(f"Participation : {turnout_vals.iloc[0] * 100:.1f}%")
|
| 800 |
+
return " | ".join(parts)
|
| 801 |
+
|
| 802 |
+
|
| 803 |
+
def _code_bv_full(commune_code: str, bureau_code: str) -> str:
|
| 804 |
+
bureau_code = str(bureau_code).zfill(4)
|
| 805 |
+
return f"{commune_code}-{bureau_code}"
|
| 806 |
+
|
| 807 |
+
|
| 808 |
+
def load_history_from_db(commune_code: str) -> pd.DataFrame:
|
| 809 |
+
engine = get_engine()
|
| 810 |
+
query = sa.text(
|
| 811 |
+
"""
|
| 812 |
+
select cm.insee_code as commune_code,
|
| 813 |
+
b.bureau_code,
|
| 814 |
+
b.bureau_label,
|
| 815 |
+
e.election_type,
|
| 816 |
+
e.election_year,
|
| 817 |
+
coalesce(e.round, 1) as round,
|
| 818 |
+
e.date as date_scrutin,
|
| 819 |
+
c.name as category,
|
| 820 |
+
rl.share_pct,
|
| 821 |
+
rl.turnout_pct,
|
| 822 |
+
rn.share_pct as share_nat
|
| 823 |
+
from results_local rl
|
| 824 |
+
join bureaux b on rl.bureau_id = b.id
|
| 825 |
+
join communes cm on b.commune_id = cm.id
|
| 826 |
+
join elections e on rl.election_id = e.id
|
| 827 |
+
join categories c on rl.category_id = c.id
|
| 828 |
+
left join results_national rn on rn.election_id = e.id and rn.category_id = rl.category_id
|
| 829 |
+
where cm.insee_code = :commune
|
| 830 |
+
"""
|
| 831 |
+
)
|
| 832 |
+
df = pd.read_sql(query, engine, params={"commune": commune_code})
|
| 833 |
+
if df.empty:
|
| 834 |
+
raise RuntimeError("Aucune donnée dans la base pour la commune demandée.")
|
| 835 |
+
df["code_bv"] = df.apply(lambda r: _code_bv_full(r["commune_code"], r["bureau_code"]), axis=1)
|
| 836 |
+
df["date_scrutin"] = pd.to_datetime(df["date_scrutin"])
|
| 837 |
+
df["share"] = pd.to_numeric(df["share_pct"], errors="coerce") / 100
|
| 838 |
+
df["share_nat"] = pd.to_numeric(df["share_nat"], errors="coerce") / 100
|
| 839 |
+
df["turnout_pct"] = pd.to_numeric(df["turnout_pct"], errors="coerce") / 100
|
| 840 |
+
df["election_year"] = pd.to_numeric(df["election_year"], errors="coerce")
|
| 841 |
+
df["round"] = pd.to_numeric(df["round"], errors="coerce").fillna(1).astype(int)
|
| 842 |
+
return df[
|
| 843 |
+
[
|
| 844 |
+
"commune_code",
|
| 845 |
+
"code_bv",
|
| 846 |
+
"bureau_label",
|
| 847 |
+
"election_type",
|
| 848 |
+
"election_year",
|
| 849 |
+
"round",
|
| 850 |
+
"date_scrutin",
|
| 851 |
+
"category",
|
| 852 |
+
"share",
|
| 853 |
+
"share_nat",
|
| 854 |
+
"turnout_pct",
|
| 855 |
+
]
|
| 856 |
+
]
|
| 857 |
+
|
| 858 |
+
|
| 859 |
+
def load_history_from_files(commune_code: str) -> pd.DataFrame:
|
| 860 |
+
elections_long_all = load_elections_long(
|
| 861 |
+
Path("data/interim/elections_long.parquet"),
|
| 862 |
+
commune_code=commune_code,
|
| 863 |
+
)
|
| 864 |
+
mapping = load_mapping(Path("data/mapping_candidats_blocs.csv"))
|
| 865 |
+
expanded_all = expand_by_category(elections_long_all, mapping)
|
| 866 |
+
local_all = aggregate_by_event(expanded_all)
|
| 867 |
+
nat = compute_national_reference(local_all)
|
| 868 |
+
|
| 869 |
+
local = local_all[local_all["commune_code"] == commune_code].copy()
|
| 870 |
+
local = local.merge(nat, on=["election_type", "election_year", "round", "category"], how="left")
|
| 871 |
+
# Columns already in aggregate_by_event/compute_national_reference
|
| 872 |
+
if "share" not in local.columns:
|
| 873 |
+
raise RuntimeError("Colonne share absente du dataset local (fallback fichiers).")
|
| 874 |
+
local["bureau_label"] = None
|
| 875 |
+
local["share_nat"] = local.get("share_nat")
|
| 876 |
+
local["turnout_pct"] = local.get("turnout_pct")
|
| 877 |
+
return local.rename(
|
| 878 |
+
columns={
|
| 879 |
+
"annee": "election_year",
|
| 880 |
+
"tour": "round",
|
| 881 |
+
}
|
| 882 |
+
)[
|
| 883 |
+
[
|
| 884 |
+
"commune_code",
|
| 885 |
+
"code_bv",
|
| 886 |
+
"bureau_label",
|
| 887 |
+
"election_type",
|
| 888 |
+
"election_year",
|
| 889 |
+
"round",
|
| 890 |
+
"date_scrutin",
|
| 891 |
+
"category",
|
| 892 |
+
"share",
|
| 893 |
+
"share_nat",
|
| 894 |
+
"turnout_pct",
|
| 895 |
+
]
|
| 896 |
+
]
|
| 897 |
+
|
| 898 |
+
|
| 899 |
+
def references_from_history(history: pd.DataFrame, target_year: int) -> Dict[str, Dict[Tuple[str, str], float]]:
|
| 900 |
+
hist = history[history["election_year"] < target_year].copy()
|
| 901 |
+
leg = (
|
| 902 |
+
hist[hist["election_type"] == "legislatives"]
|
| 903 |
+
.sort_values("date_scrutin")
|
| 904 |
+
.groupby(["code_bv", "category"])["share"]
|
| 905 |
+
.last()
|
| 906 |
+
)
|
| 907 |
+
mun2020 = (
|
| 908 |
+
hist[(hist["election_type"] == "municipales") & (hist["election_year"] == 2020)]
|
| 909 |
+
.sort_values("date_scrutin")
|
| 910 |
+
.groupby(["code_bv", "category"])["share"]
|
| 911 |
+
.last()
|
| 912 |
+
)
|
| 913 |
+
return {"leg": leg.to_dict(), "mun2020": mun2020.to_dict()}
|
| 914 |
+
|
| 915 |
+
|
| 916 |
+
def build_features_from_history(history: pd.DataFrame, target_type: str, target_year: int) -> pd.DataFrame:
|
| 917 |
+
hist = history[history["election_year"] < target_year].copy()
|
| 918 |
+
if hist.empty:
|
| 919 |
+
return pd.DataFrame()
|
| 920 |
+
hist = hist.sort_values("date_scrutin")
|
| 921 |
+
hist["dev_to_nat"] = hist["share"] - hist["share_nat"]
|
| 922 |
+
|
| 923 |
+
last_any_share = hist.groupby(["code_bv", "category"])["share"].last()
|
| 924 |
+
last_any_dev = hist.groupby(["code_bv", "category"])["dev_to_nat"].last()
|
| 925 |
+
last_type_share = (
|
| 926 |
+
hist[hist["election_type"] == target_type]
|
| 927 |
+
.groupby(["code_bv", "category"])["share"]
|
| 928 |
+
.last()
|
| 929 |
+
)
|
| 930 |
+
last_type_dev = (
|
| 931 |
+
hist[hist["election_type"] == target_type]
|
| 932 |
+
.groupby(["code_bv", "category"])["dev_to_nat"]
|
| 933 |
+
.last()
|
| 934 |
+
)
|
| 935 |
+
swing_any = (
|
| 936 |
+
hist.groupby(["code_bv", "category"])["share"]
|
| 937 |
+
.apply(lambda s: s.iloc[-1] - s.iloc[-2] if len(s) >= 2 else np.nan)
|
| 938 |
+
.rename("swing_any")
|
| 939 |
+
)
|
| 940 |
+
turnout_any = hist.groupby("code_bv")["turnout_pct"].last()
|
| 941 |
+
turnout_type = (
|
| 942 |
+
hist[hist["election_type"] == target_type]
|
| 943 |
+
.groupby("code_bv")["turnout_pct"]
|
| 944 |
+
.last()
|
| 945 |
+
)
|
| 946 |
+
|
| 947 |
+
bureaux = sorted(hist["code_bv"].dropna().unique())
|
| 948 |
+
records = []
|
| 949 |
+
for code_bv in bureaux:
|
| 950 |
+
record = {
|
| 951 |
+
"commune_code": str(code_bv).split("-")[0],
|
| 952 |
+
"code_bv": code_bv,
|
| 953 |
+
"election_type": target_type,
|
| 954 |
+
"election_year": target_year,
|
| 955 |
+
"round": 1,
|
| 956 |
+
"date_scrutin": f"{target_year}-01-01",
|
| 957 |
+
"prev_turnout_any_lag1": turnout_any.get(code_bv, np.nan),
|
| 958 |
+
"prev_turnout_same_type_lag1": turnout_type.get(code_bv, np.nan),
|
| 959 |
+
}
|
| 960 |
+
for cat in CANDIDATE_CATEGORIES:
|
| 961 |
+
record[f"prev_share_any_lag1_{cat}"] = last_any_share.get((code_bv, cat), np.nan)
|
| 962 |
+
record[f"prev_share_type_lag1_{cat}"] = last_type_share.get((code_bv, cat), np.nan)
|
| 963 |
+
record[f"prev_dev_to_national_any_lag1_{cat}"] = last_any_dev.get((code_bv, cat), np.nan)
|
| 964 |
+
record[f"prev_dev_to_national_type_lag1_{cat}"] = last_type_dev.get((code_bv, cat), np.nan)
|
| 965 |
+
record[f"swing_any_{cat}"] = swing_any.get((code_bv, cat), np.nan)
|
| 966 |
+
records.append(record)
|
| 967 |
+
return pd.DataFrame.from_records(records)
|
| 968 |
+
|
| 969 |
+
|
| 970 |
+
def load_model() -> Path:
|
| 971 |
+
best_path = MODEL_DIR / "best_model.json"
|
| 972 |
+
if best_path.exists():
|
| 973 |
+
try:
|
| 974 |
+
payload = json.loads(best_path.read_text())
|
| 975 |
+
name = payload.get("name")
|
| 976 |
+
if name:
|
| 977 |
+
candidate = MODEL_DIR / f"{name}.joblib"
|
| 978 |
+
if candidate.exists():
|
| 979 |
+
return candidate
|
| 980 |
+
except Exception:
|
| 981 |
+
pass
|
| 982 |
+
if (MODEL_DIR / "hist_gradient_boosting.joblib").exists():
|
| 983 |
+
return MODEL_DIR / "hist_gradient_boosting.joblib"
|
| 984 |
+
joblibs = sorted(MODEL_DIR.glob("*.joblib"))
|
| 985 |
+
if not joblibs:
|
| 986 |
+
raise FileNotFoundError("Aucun modèle trouvé dans models/. Lancez src/model/train.py.")
|
| 987 |
+
return joblibs[0]
|
| 988 |
+
|
| 989 |
+
|
| 990 |
+
def load_feature_columns(path: Path, df: pd.DataFrame) -> list[str]:
|
| 991 |
+
if path.exists():
|
| 992 |
+
return json.loads(path.read_text())
|
| 993 |
+
exclude = {"commune_code", "code_bv", "election_type", "election_year", "round", "date_scrutin"}
|
| 994 |
+
return [c for c in df.columns if c not in exclude]
|
| 995 |
+
|
| 996 |
+
|
| 997 |
+
def format_delta(value) -> str:
|
| 998 |
+
if value is None or (isinstance(value, float) and np.isnan(value)):
|
| 999 |
+
return "N/A"
|
| 1000 |
+
sign = "+" if value >= 0 else ""
|
| 1001 |
+
return f"{sign}{round(value, 1)}"
|
| 1002 |
+
|
| 1003 |
+
|
| 1004 |
+
class PredictorBackend:
|
| 1005 |
+
def __init__(self, commune_code: str = COMMUNE_CODE_SETE):
|
| 1006 |
+
self.commune_code = commune_code
|
| 1007 |
+
self.backend = "local"
|
| 1008 |
+
try:
|
| 1009 |
+
self.history = load_history_from_db(commune_code)
|
| 1010 |
+
self.backend = "postgres"
|
| 1011 |
+
LOGGER.info("Backend PostgreSQL chargé (%s lignes)", len(self.history))
|
| 1012 |
+
except Exception as exc:
|
| 1013 |
+
LOGGER.warning("PostgreSQL indisponible (%s) -> fallback fichiers.", exc)
|
| 1014 |
+
self.history = load_history_from_files(commune_code)
|
| 1015 |
+
self.backend = "files"
|
| 1016 |
+
LOGGER.info("Backend fichiers chargé (%s lignes)", len(self.history))
|
| 1017 |
+
self.history = clean_history_frame(self.history)
|
| 1018 |
+
self.event_stats = load_bureau_event_stats(commune_code)
|
| 1019 |
+
self.commune_stats = load_commune_event_stats(commune_code)
|
| 1020 |
+
self.default_rates = {}
|
| 1021 |
+
self.default_rates_by_type: dict[str, dict[str, float]] = {}
|
| 1022 |
+
stats = self.commune_stats if not self.commune_stats.empty else self.event_stats
|
| 1023 |
+
if not stats.empty:
|
| 1024 |
+
if "round" in stats.columns:
|
| 1025 |
+
round1 = stats[stats["round"] == 1]
|
| 1026 |
+
if not round1.empty:
|
| 1027 |
+
stats = round1
|
| 1028 |
+
self.default_rates = {
|
| 1029 |
+
"turnout_pct": float(stats["turnout_pct"].median(skipna=True)),
|
| 1030 |
+
"blancs_pct": float(stats["blancs_pct"].median(skipna=True)),
|
| 1031 |
+
"nuls_pct": float(stats["nuls_pct"].median(skipna=True)),
|
| 1032 |
+
}
|
| 1033 |
+
if "election_type" in stats.columns:
|
| 1034 |
+
for etype, group in stats.groupby("election_type"):
|
| 1035 |
+
self.default_rates_by_type[str(etype)] = {
|
| 1036 |
+
"turnout_pct": float(group["turnout_pct"].median(skipna=True)),
|
| 1037 |
+
"blancs_pct": float(group["blancs_pct"].median(skipna=True)),
|
| 1038 |
+
"nuls_pct": float(group["nuls_pct"].median(skipna=True)),
|
| 1039 |
+
}
|
| 1040 |
+
self.model_path = load_model()
|
| 1041 |
+
self.model = joblib.load(self.model_path)
|
| 1042 |
+
# feature cache per target
|
| 1043 |
+
self.refs_cache: Dict[Tuple[str, int], Dict[str, Dict[Tuple[str, str], float]]] = {}
|
| 1044 |
+
|
| 1045 |
+
def available_bureaux(self) -> list[str]:
|
| 1046 |
+
return sorted(self.history["code_bv"].dropna().unique().tolist())
|
| 1047 |
+
|
| 1048 |
+
def available_targets(self) -> list[Tuple[str, int]]:
|
| 1049 |
+
existing = set()
|
| 1050 |
+
for row in self.history.itertuples(index=False):
|
| 1051 |
+
try:
|
| 1052 |
+
year = int(row.election_year) # type: ignore
|
| 1053 |
+
except Exception:
|
| 1054 |
+
continue
|
| 1055 |
+
existing.add((row.election_type, year))
|
| 1056 |
+
for t in DEFAULT_TARGETS:
|
| 1057 |
+
existing.add(t)
|
| 1058 |
+
return sorted(existing, key=lambda x: (x[1], x[0]))
|
| 1059 |
+
|
| 1060 |
+
def _get_features_and_refs(self, target_type: str, target_year: int) -> Tuple[pd.DataFrame, Dict[str, Dict[Tuple[str, str], float]]]:
|
| 1061 |
+
key = (target_type, target_year)
|
| 1062 |
+
if key not in FEATURE_CACHE:
|
| 1063 |
+
feature_df = build_features_from_history(self.history, target_type, target_year)
|
| 1064 |
+
refs = references_from_history(self.history, target_year)
|
| 1065 |
+
FEATURE_CACHE[key] = (feature_df, refs)
|
| 1066 |
+
return FEATURE_CACHE[key]
|
| 1067 |
+
|
| 1068 |
+
def predict_bureau_details(
|
| 1069 |
+
self,
|
| 1070 |
+
code_bv: str,
|
| 1071 |
+
target_type: str,
|
| 1072 |
+
target_year: int,
|
| 1073 |
+
inscrits_override: float | None = None,
|
| 1074 |
+
) -> Tuple[Dict[str, object] | None, str, str]:
|
| 1075 |
+
feature_df, _ = self._get_features_and_refs(target_type, target_year)
|
| 1076 |
+
if feature_df.empty:
|
| 1077 |
+
return None, "Données insuffisantes", ""
|
| 1078 |
+
row = feature_df[feature_df["code_bv"] == code_bv].copy()
|
| 1079 |
+
if row.empty:
|
| 1080 |
+
return None, "Bureau non trouvé dans l'historique.", ""
|
| 1081 |
+
|
| 1082 |
+
feature_cols = load_feature_columns(FEATURE_COLS_PATH, feature_df)
|
| 1083 |
+
missing = [c for c in feature_cols if c not in row.columns]
|
| 1084 |
+
for col in missing:
|
| 1085 |
+
row[col] = np.nan
|
| 1086 |
+
preds = self.model.predict(row[feature_cols])
|
| 1087 |
+
preds = np.clip(preds, 0, 1)
|
| 1088 |
+
sums = preds.sum(axis=1, keepdims=True)
|
| 1089 |
+
sums[sums == 0] = 1
|
| 1090 |
+
preds = preds / sums
|
| 1091 |
+
preds_share = preds.flatten()
|
| 1092 |
+
|
| 1093 |
+
preds_by_cat = {cat: float(preds_share[idx]) for idx, cat in enumerate(CANDIDATE_CATEGORIES)}
|
| 1094 |
+
ordered = ordered_categories()
|
| 1095 |
+
share_vec = np.array([preds_by_cat.get(cat, 0.0) for cat in ordered], dtype=float)
|
| 1096 |
+
|
| 1097 |
+
stats = self.event_stats[self.event_stats["code_bv"] == code_bv].sort_values("date_scrutin")
|
| 1098 |
+
inscrits_used = None
|
| 1099 |
+
if inscrits_override is not None:
|
| 1100 |
+
try:
|
| 1101 |
+
value = float(inscrits_override)
|
| 1102 |
+
if value > 0:
|
| 1103 |
+
inscrits_used = value
|
| 1104 |
+
except (TypeError, ValueError):
|
| 1105 |
+
inscrits_used = None
|
| 1106 |
+
if inscrits_used is None and not stats.empty:
|
| 1107 |
+
serie = pd.to_numeric(stats["inscrits"], errors="coerce").dropna()
|
| 1108 |
+
if not serie.empty:
|
| 1109 |
+
inscrits_used = float(serie.iloc[-1])
|
| 1110 |
+
if inscrits_used is None:
|
| 1111 |
+
return None, "Inscrits indisponibles pour ce bureau.", ""
|
| 1112 |
+
|
| 1113 |
+
def pick_rate(col: str) -> float:
|
| 1114 |
+
default = self.default_rates.get(col, 0.0)
|
| 1115 |
+
default = 0.0 if default is None or np.isnan(default) else float(default)
|
| 1116 |
+
type_default = self.default_rates_by_type.get(target_type, {}).get(col)
|
| 1117 |
+
if type_default is None or np.isnan(type_default):
|
| 1118 |
+
type_default = default
|
| 1119 |
+
|
| 1120 |
+
bureau_scoped = self.event_stats
|
| 1121 |
+
if not bureau_scoped.empty and "round" in bureau_scoped.columns:
|
| 1122 |
+
round1 = bureau_scoped[bureau_scoped["round"] == 1]
|
| 1123 |
+
if not round1.empty:
|
| 1124 |
+
bureau_scoped = round1
|
| 1125 |
+
|
| 1126 |
+
series = None
|
| 1127 |
+
years = None
|
| 1128 |
+
if (
|
| 1129 |
+
not bureau_scoped.empty
|
| 1130 |
+
and col in bureau_scoped.columns
|
| 1131 |
+
and "election_type" in bureau_scoped.columns
|
| 1132 |
+
):
|
| 1133 |
+
if target_type in bureau_scoped["election_type"].values:
|
| 1134 |
+
mask = bureau_scoped["election_type"] == target_type
|
| 1135 |
+
series = bureau_scoped.loc[mask, col]
|
| 1136 |
+
years = bureau_scoped.loc[mask, "election_year"]
|
| 1137 |
+
|
| 1138 |
+
if series is None and not self.commune_stats.empty and col in self.commune_stats.columns:
|
| 1139 |
+
commune_scoped = self.commune_stats
|
| 1140 |
+
if "round" in commune_scoped.columns:
|
| 1141 |
+
round1 = commune_scoped[commune_scoped["round"] == 1]
|
| 1142 |
+
if not round1.empty:
|
| 1143 |
+
commune_scoped = round1
|
| 1144 |
+
if target_type in commune_scoped["election_type"].values:
|
| 1145 |
+
mask = commune_scoped["election_type"] == target_type
|
| 1146 |
+
series = commune_scoped.loc[mask, col]
|
| 1147 |
+
years = commune_scoped.loc[mask, "election_year"]
|
| 1148 |
+
else:
|
| 1149 |
+
series = commune_scoped[col]
|
| 1150 |
+
years = commune_scoped["election_year"]
|
| 1151 |
+
|
| 1152 |
+
if series is None:
|
| 1153 |
+
if bureau_scoped.empty or col not in bureau_scoped.columns:
|
| 1154 |
+
return type_default
|
| 1155 |
+
series = bureau_scoped[col]
|
| 1156 |
+
years = bureau_scoped["election_year"]
|
| 1157 |
+
|
| 1158 |
+
rate = _project_rate(series, years, target_year)
|
| 1159 |
+
if rate is None or np.isnan(rate):
|
| 1160 |
+
return type_default
|
| 1161 |
+
return float(rate)
|
| 1162 |
+
|
| 1163 |
+
turnout_rate = pick_rate("turnout_pct")
|
| 1164 |
+
blancs_rate = pick_rate("blancs_pct")
|
| 1165 |
+
nuls_rate = pick_rate("nuls_pct")
|
| 1166 |
+
if blancs_rate + nuls_rate > turnout_rate and (blancs_rate + nuls_rate) > 0:
|
| 1167 |
+
scale = turnout_rate / (blancs_rate + nuls_rate)
|
| 1168 |
+
blancs_rate *= scale
|
| 1169 |
+
nuls_rate *= scale
|
| 1170 |
+
|
| 1171 |
+
inscrits_total = int(round(inscrits_used))
|
| 1172 |
+
votants_total = int(round(inscrits_total * turnout_rate))
|
| 1173 |
+
blancs_total = int(round(inscrits_total * blancs_rate))
|
| 1174 |
+
nuls_total = int(round(inscrits_total * nuls_rate))
|
| 1175 |
+
if blancs_total + nuls_total > votants_total and (blancs_total + nuls_total) > 0:
|
| 1176 |
+
scale = votants_total / (blancs_total + nuls_total)
|
| 1177 |
+
blancs_total = int(round(blancs_total * scale))
|
| 1178 |
+
nuls_total = int(round(nuls_total * scale))
|
| 1179 |
+
exprimes_total = max(0, votants_total - blancs_total - nuls_total)
|
| 1180 |
+
abstention_total = max(0, inscrits_total - votants_total)
|
| 1181 |
+
|
| 1182 |
+
bloc_counts = _allocate_counts(share_vec, exprimes_total)
|
| 1183 |
+
counts_by_cat = {cat: int(count) for cat, count in zip(ordered, bloc_counts)}
|
| 1184 |
+
counts_by_cat.update(
|
| 1185 |
+
{
|
| 1186 |
+
"blancs": int(blancs_total),
|
| 1187 |
+
"nuls": int(nuls_total),
|
| 1188 |
+
"abstention": int(abstention_total),
|
| 1189 |
+
}
|
| 1190 |
+
)
|
| 1191 |
+
backend_label = format_backend_label(self.backend)
|
| 1192 |
+
meta = (
|
| 1193 |
+
f"Inscrits utilisés : {inscrits_total} | Votants : {votants_total} | "
|
| 1194 |
+
f"Blancs : {blancs_total} | Nuls : {nuls_total} | Abstentions : {abstention_total}"
|
| 1195 |
+
)
|
| 1196 |
+
details = {
|
| 1197 |
+
"shares_by_cat": preds_by_cat,
|
| 1198 |
+
"share_vec": share_vec,
|
| 1199 |
+
"ordered": ordered,
|
| 1200 |
+
"counts": counts_by_cat,
|
| 1201 |
+
"totals": {
|
| 1202 |
+
"inscrits": inscrits_total,
|
| 1203 |
+
"votants": votants_total,
|
| 1204 |
+
"blancs": blancs_total,
|
| 1205 |
+
"nuls": nuls_total,
|
| 1206 |
+
"abstention": abstention_total,
|
| 1207 |
+
"exprimes": exprimes_total,
|
| 1208 |
+
},
|
| 1209 |
+
}
|
| 1210 |
+
return details, backend_label, meta
|
| 1211 |
+
|
| 1212 |
+
def predict_bureau(
|
| 1213 |
+
self,
|
| 1214 |
+
code_bv: str,
|
| 1215 |
+
target_type: str,
|
| 1216 |
+
target_year: int,
|
| 1217 |
+
inscrits_override: float | None = None,
|
| 1218 |
+
) -> Tuple[pd.DataFrame, str, str]:
|
| 1219 |
+
details, backend_label, meta = self.predict_bureau_details(
|
| 1220 |
+
code_bv,
|
| 1221 |
+
target_type,
|
| 1222 |
+
target_year,
|
| 1223 |
+
inscrits_override,
|
| 1224 |
+
)
|
| 1225 |
+
if details is None:
|
| 1226 |
+
return pd.DataFrame(), backend_label, ""
|
| 1227 |
+
counts_by_cat = details["counts"]
|
| 1228 |
+
ordered = details["ordered"]
|
| 1229 |
+
rows = []
|
| 1230 |
+
for cat in ordered:
|
| 1231 |
+
rows.append(
|
| 1232 |
+
{
|
| 1233 |
+
"categorie": DISPLAY_CATEGORY_LABELS.get(cat, cat),
|
| 1234 |
+
"nombre": int(counts_by_cat.get(cat, 0)),
|
| 1235 |
+
}
|
| 1236 |
+
)
|
| 1237 |
+
for extra in ["blancs", "nuls", "abstention"]:
|
| 1238 |
+
rows.append(
|
| 1239 |
+
{
|
| 1240 |
+
"categorie": DISPLAY_CATEGORY_LABELS[extra],
|
| 1241 |
+
"nombre": int(counts_by_cat.get(extra, 0)),
|
| 1242 |
+
}
|
| 1243 |
+
)
|
| 1244 |
+
return pd.DataFrame(rows), backend_label, meta
|
| 1245 |
+
|
| 1246 |
+
|
| 1247 |
+
def build_bar_chart(
|
| 1248 |
+
df: pd.DataFrame,
|
| 1249 |
+
value_col: str,
|
| 1250 |
+
*,
|
| 1251 |
+
color: str = "#3b82f6",
|
| 1252 |
+
color_map: Dict[str, str] | None = None,
|
| 1253 |
+
category_col: str = "categorie",
|
| 1254 |
+
ylabel: str = "Score (%)",
|
| 1255 |
+
):
|
| 1256 |
+
try:
|
| 1257 |
+
import matplotlib.pyplot as plt
|
| 1258 |
+
except Exception:
|
| 1259 |
+
return None
|
| 1260 |
+
if df.empty or value_col not in df.columns:
|
| 1261 |
+
return None
|
| 1262 |
+
plt.figure(figsize=(6, 3))
|
| 1263 |
+
labels = df[category_col].astype(str).tolist() if category_col in df.columns else []
|
| 1264 |
+
if color_map:
|
| 1265 |
+
colors = [color_map.get(label, color) for label in labels]
|
| 1266 |
+
else:
|
| 1267 |
+
colors = color
|
| 1268 |
+
plt.bar(labels, df[value_col], color=colors)
|
| 1269 |
+
plt.xticks(rotation=30, ha="right")
|
| 1270 |
+
plt.ylabel(ylabel)
|
| 1271 |
+
plt.tight_layout()
|
| 1272 |
+
return plt
|
| 1273 |
+
|
| 1274 |
+
|
| 1275 |
+
def create_interface() -> gr.Blocks:
|
| 1276 |
+
backend = PredictorBackend()
|
| 1277 |
+
bureau_choices = build_bureau_choices(backend.history)
|
| 1278 |
+
bureau_labels = [label for label, _ in bureau_choices]
|
| 1279 |
+
bureau_map = {label: value for label, value in bureau_choices}
|
| 1280 |
+
bureau_label_by_code = {value: label for label, value in bureau_choices}
|
| 1281 |
+
targets = backend.available_targets()
|
| 1282 |
+
target_labels = [f"{t} {y}" for t, y in targets]
|
| 1283 |
+
history_choices = build_history_choices(backend.history)
|
| 1284 |
+
history_labels = [label for label, _ in history_choices]
|
| 1285 |
+
history_map = {label: value for label, value in history_choices}
|
| 1286 |
+
if ("municipales", 2026) in targets:
|
| 1287 |
+
default_target = "municipales 2026"
|
| 1288 |
+
elif targets:
|
| 1289 |
+
default_target = f"{targets[-1][0]} {targets[-1][1]}"
|
| 1290 |
+
else:
|
| 1291 |
+
default_target = "municipales 2026"
|
| 1292 |
+
default_bv = bureau_labels[0] if bureau_labels else None
|
| 1293 |
+
default_history = history_labels[-1] if history_labels else None
|
| 1294 |
+
backend_label = format_backend_label(backend.backend)
|
| 1295 |
+
residual_payload = load_residual_intervals()
|
| 1296 |
+
residuals = residual_payload.get("residuals", {}) if isinstance(residual_payload, dict) else {}
|
| 1297 |
+
residual_model = residual_payload.get("model", "inconnu") if isinstance(residual_payload, dict) else "inconnu"
|
| 1298 |
+
interval_choices = list(INTERVAL_BANDS.keys()) or ["80% (p10-p90)"]
|
| 1299 |
+
interval_default = interval_choices[0]
|
| 1300 |
+
bloc_labels = [DISPLAY_CATEGORY_LABELS.get(cat, cat) for cat in ordered_categories()]
|
| 1301 |
+
|
| 1302 |
+
with gr.Blocks(title="Prévision Municipales — Ville de Sète") as demo:
|
| 1303 |
+
gr.Markdown(
|
| 1304 |
+
"""
|
| 1305 |
+
# Prévision Municipales — Ville de Sète
|
| 1306 |
+
Choisissez un bureau de vote et une élection cible.
|
| 1307 |
+
Le modèle estime un volume par catégorie politique, ainsi que les abstentions, blancs et nuls.
|
| 1308 |
+
"""
|
| 1309 |
+
)
|
| 1310 |
+
with gr.Tabs():
|
| 1311 |
+
with gr.Tab("Prévisions"):
|
| 1312 |
+
with gr.Row():
|
| 1313 |
+
bureau_dd = gr.Dropdown(choices=bureau_labels, value=default_bv, label="Bureau de vote")
|
| 1314 |
+
target_dd = gr.Dropdown(choices=target_labels, value=default_target, label="Élection cible (type année)")
|
| 1315 |
+
inscrits_in = gr.Number(value=None, label="Inscrits (optionnel)", precision=0)
|
| 1316 |
+
predict_btn = gr.Button("Prédire")
|
| 1317 |
+
source_box = gr.Markdown(value=f"Source des données : {backend_label}")
|
| 1318 |
+
output_df = gr.Dataframe(
|
| 1319 |
+
headers=PREDICTION_OUTPUT_COLUMNS,
|
| 1320 |
+
label="Prédictions (nombres)",
|
| 1321 |
+
)
|
| 1322 |
+
chart = gr.Plot()
|
| 1323 |
+
|
| 1324 |
+
with gr.Tab("Historique"):
|
| 1325 |
+
gr.Markdown(
|
| 1326 |
+
"""
|
| 1327 |
+
Consultation des résultats passés (sans machine learning).
|
| 1328 |
+
Sélectionnez un bureau et une élection pour afficher l'histogramme des parts par tendance politique.
|
| 1329 |
+
"""
|
| 1330 |
+
)
|
| 1331 |
+
with gr.Row():
|
| 1332 |
+
history_bureau_dd = gr.Dropdown(choices=bureau_labels, value=default_bv, label="Bureau de vote")
|
| 1333 |
+
history_election_dd = gr.Dropdown(
|
| 1334 |
+
choices=history_labels,
|
| 1335 |
+
value=default_history,
|
| 1336 |
+
label="Élection (type année tour)",
|
| 1337 |
+
)
|
| 1338 |
+
history_btn = gr.Button("Afficher l'historique")
|
| 1339 |
+
history_source = gr.Markdown(value=f"Source des données : {backend_label}")
|
| 1340 |
+
history_df = gr.Dataframe(headers=HISTORY_OUTPUT_COLUMNS, label="Résultats historiques")
|
| 1341 |
+
history_chart = gr.Plot()
|
| 1342 |
+
history_meta = gr.Markdown()
|
| 1343 |
+
|
| 1344 |
+
with gr.Tab("Carte"):
|
| 1345 |
+
gr.Markdown(
|
| 1346 |
+
"""
|
| 1347 |
+
Carte des bureaux de vote de Sète.
|
| 1348 |
+
Cliquez sur un polygone pour afficher la prédiction (table + graphique).
|
| 1349 |
+
"""
|
| 1350 |
+
)
|
| 1351 |
+
map_legend = gr.HTML(value=build_map_legend_html())
|
| 1352 |
+
with gr.Row():
|
| 1353 |
+
map_target_dd = gr.Dropdown(
|
| 1354 |
+
choices=target_labels,
|
| 1355 |
+
value=default_target,
|
| 1356 |
+
label="Élection cible (type année)",
|
| 1357 |
+
)
|
| 1358 |
+
map_btn = gr.Button("Afficher la carte")
|
| 1359 |
+
map_html = gr.HTML(value="<p>Cliquez sur 'Afficher la carte' pour charger la carte.</p>")
|
| 1360 |
+
|
| 1361 |
+
with gr.Tab("Stratégie"):
|
| 1362 |
+
gr.Markdown(
|
| 1363 |
+
"""
|
| 1364 |
+
Analyse stratégique par bureau : intervalles d'incertitude issus des résidus CV,
|
| 1365 |
+
puis simulateur de transferts pour estimer des bascules potentielles.
|
| 1366 |
+
"""
|
| 1367 |
+
)
|
| 1368 |
+
with gr.Row():
|
| 1369 |
+
strategy_bureau_dd = gr.Dropdown(choices=bureau_labels, value=default_bv, label="Bureau de vote")
|
| 1370 |
+
strategy_target_dd = gr.Dropdown(
|
| 1371 |
+
choices=target_labels,
|
| 1372 |
+
value=default_target,
|
| 1373 |
+
label="Élection cible (type année)",
|
| 1374 |
+
)
|
| 1375 |
+
strategy_inscrits_in = gr.Number(value=None, label="Inscrits (optionnel)", precision=0)
|
| 1376 |
+
interval_dd = gr.Dropdown(
|
| 1377 |
+
choices=interval_choices,
|
| 1378 |
+
value=interval_default,
|
| 1379 |
+
label="Intervalle CV",
|
| 1380 |
+
)
|
| 1381 |
+
strategy_btn = gr.Button("Analyser l'incertitude")
|
| 1382 |
+
interval_source = gr.Markdown(
|
| 1383 |
+
value=(
|
| 1384 |
+
f"Intervalle CV basé sur le modèle : {residual_model}"
|
| 1385 |
+
if residuals
|
| 1386 |
+
else "Intervalle CV indisponible (fallback ±3%)."
|
| 1387 |
+
)
|
| 1388 |
+
)
|
| 1389 |
+
interval_df = gr.Dataframe(
|
| 1390 |
+
headers=INTERVAL_OUTPUT_COLUMNS,
|
| 1391 |
+
label="Plage empirique par bloc",
|
| 1392 |
+
)
|
| 1393 |
+
interval_chart = gr.Plot()
|
| 1394 |
+
|
| 1395 |
+
gr.Markdown("### Simulateur de transferts (points d'inscrits)")
|
| 1396 |
+
with gr.Row():
|
| 1397 |
+
target_bloc_dd = gr.Dropdown(choices=bloc_labels, value=bloc_labels[0] if bloc_labels else None, label="Bloc cible")
|
| 1398 |
+
with gr.Row():
|
| 1399 |
+
source_1_dd = gr.Dropdown(choices=TRANSFER_CATEGORY_LABELS, value=DISPLAY_CATEGORY_LABELS["abstention"], label="Source 1")
|
| 1400 |
+
target_1_dd = gr.Dropdown(choices=TRANSFER_CATEGORY_LABELS, value=DISPLAY_CATEGORY_LABELS["droite_dure"], label="Cible 1")
|
| 1401 |
+
delta_1 = gr.Slider(minimum=0, maximum=10, value=3, step=0.1, label="Delta 1 (points %)")
|
| 1402 |
+
with gr.Row():
|
| 1403 |
+
source_2_dd = gr.Dropdown(choices=TRANSFER_CATEGORY_LABELS, value=DISPLAY_CATEGORY_LABELS["droite_modere"], label="Source 2")
|
| 1404 |
+
target_2_dd = gr.Dropdown(choices=TRANSFER_CATEGORY_LABELS, value=DISPLAY_CATEGORY_LABELS["gauche_modere"], label="Cible 2")
|
| 1405 |
+
delta_2 = gr.Slider(minimum=0, maximum=10, value=3, step=0.1, label="Delta 2 (points %)")
|
| 1406 |
+
simulate_btn = gr.Button("Simuler les transferts")
|
| 1407 |
+
sim_df = gr.Dataframe(headers=SIM_OUTPUT_COLUMNS, label="Simulation par catégorie")
|
| 1408 |
+
sim_chart = gr.Plot()
|
| 1409 |
+
opportunity_df = gr.Dataframe(headers=OPPORTUNITY_OUTPUT_COLUMNS, label="Bureaux à potentiel (trié)")
|
| 1410 |
+
|
| 1411 |
+
def _predict(bv_label: str, target_label: str, inscrits_override: float | None):
|
| 1412 |
+
if not bv_label or not target_label:
|
| 1413 |
+
return pd.DataFrame(), "Entrée invalide", None
|
| 1414 |
+
code_bv = bureau_map.get(bv_label)
|
| 1415 |
+
if not code_bv:
|
| 1416 |
+
return pd.DataFrame(), "Bureau invalide", None
|
| 1417 |
+
try:
|
| 1418 |
+
parts = target_label.split()
|
| 1419 |
+
target_type, target_year = parts[0].lower(), int(parts[1])
|
| 1420 |
+
except Exception:
|
| 1421 |
+
target_type, target_year = "municipales", 2026
|
| 1422 |
+
df, backend_label, meta = backend.predict_bureau(code_bv, target_type, target_year, inscrits_override)
|
| 1423 |
+
plot = build_bar_chart(
|
| 1424 |
+
df,
|
| 1425 |
+
value_col="nombre",
|
| 1426 |
+
ylabel="Nombre d'électeurs",
|
| 1427 |
+
color_map=DISPLAY_LABEL_COLORS,
|
| 1428 |
+
)
|
| 1429 |
+
meta_label = f" | {meta}" if meta else ""
|
| 1430 |
+
return df, f"Source des données : {backend_label}{meta_label}", plot
|
| 1431 |
+
|
| 1432 |
+
def _parse_target_label(target_label: str) -> Tuple[str, int]:
|
| 1433 |
+
try:
|
| 1434 |
+
parts = target_label.split()
|
| 1435 |
+
return parts[0].lower(), int(parts[1])
|
| 1436 |
+
except Exception:
|
| 1437 |
+
return "municipales", 2026
|
| 1438 |
+
|
| 1439 |
+
def _map(target_label: str):
|
| 1440 |
+
if not target_label:
|
| 1441 |
+
return "<p>Élection invalide.</p>"
|
| 1442 |
+
target_type, target_year = _parse_target_label(target_label)
|
| 1443 |
+
return build_bureau_map_html(backend, target_type, target_year)
|
| 1444 |
+
|
| 1445 |
+
def _history(bv_label: str, election_label: str):
|
| 1446 |
+
if not bv_label or not election_label:
|
| 1447 |
+
empty = pd.DataFrame(columns=HISTORY_OUTPUT_COLUMNS)
|
| 1448 |
+
return empty, "Entrée invalide", None, ""
|
| 1449 |
+
code_bv = bureau_map.get(bv_label)
|
| 1450 |
+
if not code_bv:
|
| 1451 |
+
empty = pd.DataFrame(columns=HISTORY_OUTPUT_COLUMNS)
|
| 1452 |
+
return empty, "Bureau invalide", None, ""
|
| 1453 |
+
election_key = history_map.get(election_label)
|
| 1454 |
+
if not election_key:
|
| 1455 |
+
empty = pd.DataFrame(columns=HISTORY_OUTPUT_COLUMNS)
|
| 1456 |
+
return empty, "Élection invalide", None, ""
|
| 1457 |
+
try:
|
| 1458 |
+
election_type, election_year, round_num = parse_election_key(election_key)
|
| 1459 |
+
except Exception:
|
| 1460 |
+
empty = pd.DataFrame(columns=HISTORY_OUTPUT_COLUMNS)
|
| 1461 |
+
return empty, "Élection invalide", None, ""
|
| 1462 |
+
history_slice = backend.history[
|
| 1463 |
+
(backend.history["code_bv"] == code_bv)
|
| 1464 |
+
& (backend.history["election_type"] == election_type)
|
| 1465 |
+
& (backend.history["election_year"] == election_year)
|
| 1466 |
+
& (backend.history["round"] == round_num)
|
| 1467 |
+
].copy()
|
| 1468 |
+
if history_slice.empty:
|
| 1469 |
+
empty = pd.DataFrame(columns=HISTORY_OUTPUT_COLUMNS)
|
| 1470 |
+
return empty, f"Source des données : {backend_label}", None, "Aucun résultat pour ce bureau."
|
| 1471 |
+
table = prepare_history_table(history_slice)
|
| 1472 |
+
plot = build_bar_chart(
|
| 1473 |
+
table,
|
| 1474 |
+
value_col="score_%",
|
| 1475 |
+
ylabel="Score (%)",
|
| 1476 |
+
color_map=DISPLAY_LABEL_COLORS,
|
| 1477 |
+
)
|
| 1478 |
+
meta = format_history_meta(history_slice)
|
| 1479 |
+
return table, f"Source des données : {backend_label}", plot, meta
|
| 1480 |
+
|
| 1481 |
+
def _strategy_interval(
|
| 1482 |
+
bv_label: str,
|
| 1483 |
+
target_label: str,
|
| 1484 |
+
inscrits_override: float | None,
|
| 1485 |
+
band_label: str,
|
| 1486 |
+
):
|
| 1487 |
+
empty = pd.DataFrame(columns=INTERVAL_OUTPUT_COLUMNS)
|
| 1488 |
+
if not bv_label or not target_label:
|
| 1489 |
+
return empty, "Entrée invalide", None
|
| 1490 |
+
code_bv = bureau_map.get(bv_label)
|
| 1491 |
+
if not code_bv:
|
| 1492 |
+
return empty, "Bureau invalide", None
|
| 1493 |
+
target_type, target_year = _parse_target_label(target_label)
|
| 1494 |
+
details, backend_label_local, _ = backend.predict_bureau_details(
|
| 1495 |
+
code_bv,
|
| 1496 |
+
target_type,
|
| 1497 |
+
target_year,
|
| 1498 |
+
inscrits_override,
|
| 1499 |
+
)
|
| 1500 |
+
if details is None:
|
| 1501 |
+
return empty, backend_label_local, None
|
| 1502 |
+
totals = details["totals"]
|
| 1503 |
+
exprimes_total = int(totals.get("exprimes", 0))
|
| 1504 |
+
table = build_interval_table(
|
| 1505 |
+
details["shares_by_cat"],
|
| 1506 |
+
exprimes_total,
|
| 1507 |
+
residuals,
|
| 1508 |
+
band_label,
|
| 1509 |
+
)
|
| 1510 |
+
plot = build_interval_chart(table, color_map=DISPLAY_LABEL_COLORS)
|
| 1511 |
+
source = (
|
| 1512 |
+
f"Intervalle CV ({band_label}) basé sur le modèle : {residual_model}"
|
| 1513 |
+
if residuals
|
| 1514 |
+
else "Intervalle CV indisponible (fallback ±3%)."
|
| 1515 |
+
)
|
| 1516 |
+
return table, source, plot
|
| 1517 |
+
|
| 1518 |
+
def _strategy_simulate(
|
| 1519 |
+
bv_label: str,
|
| 1520 |
+
target_label: str,
|
| 1521 |
+
inscrits_override: float | None,
|
| 1522 |
+
bloc_cible_label: str,
|
| 1523 |
+
source_1: str,
|
| 1524 |
+
target_1: str,
|
| 1525 |
+
delta_1_val: float,
|
| 1526 |
+
source_2: str,
|
| 1527 |
+
target_2: str,
|
| 1528 |
+
delta_2_val: float,
|
| 1529 |
+
):
|
| 1530 |
+
empty_sim = pd.DataFrame(columns=SIM_OUTPUT_COLUMNS)
|
| 1531 |
+
empty_oppo = pd.DataFrame(columns=OPPORTUNITY_OUTPUT_COLUMNS)
|
| 1532 |
+
if not bv_label or not target_label:
|
| 1533 |
+
return empty_sim, None, empty_oppo
|
| 1534 |
+
code_bv = bureau_map.get(bv_label)
|
| 1535 |
+
if not code_bv:
|
| 1536 |
+
return empty_sim, None, empty_oppo
|
| 1537 |
+
target_type, target_year = _parse_target_label(target_label)
|
| 1538 |
+
details, _, _ = backend.predict_bureau_details(
|
| 1539 |
+
code_bv,
|
| 1540 |
+
target_type,
|
| 1541 |
+
target_year,
|
| 1542 |
+
inscrits_override,
|
| 1543 |
+
)
|
| 1544 |
+
if details is None:
|
| 1545 |
+
return empty_sim, None, empty_oppo
|
| 1546 |
+
|
| 1547 |
+
transfers = []
|
| 1548 |
+
for src_label, dst_label, delta in [
|
| 1549 |
+
(source_1, target_1, delta_1_val),
|
| 1550 |
+
(source_2, target_2, delta_2_val),
|
| 1551 |
+
]:
|
| 1552 |
+
src_key = CATEGORY_LABEL_TO_KEY.get(src_label)
|
| 1553 |
+
dst_key = CATEGORY_LABEL_TO_KEY.get(dst_label)
|
| 1554 |
+
if src_key and dst_key and delta and delta > 0:
|
| 1555 |
+
transfers.append((src_key, dst_key, float(delta)))
|
| 1556 |
+
|
| 1557 |
+
counts = details["counts"]
|
| 1558 |
+
totals = details["totals"]
|
| 1559 |
+
inscrits_total = int(totals.get("inscrits", 0))
|
| 1560 |
+
updated = apply_transfers(counts, inscrits_total, transfers)
|
| 1561 |
+
sim_table = build_simulation_table(counts, updated)
|
| 1562 |
+
sim_plot = build_bar_chart(
|
| 1563 |
+
sim_table,
|
| 1564 |
+
value_col="apres_transfert",
|
| 1565 |
+
ylabel="Nombre d'électeurs",
|
| 1566 |
+
color_map=DISPLAY_LABEL_COLORS,
|
| 1567 |
+
)
|
| 1568 |
+
|
| 1569 |
+
target_bloc = CATEGORY_LABEL_TO_KEY.get(bloc_cible_label, bloc_cible_label)
|
| 1570 |
+
opp_rows = []
|
| 1571 |
+
if target_bloc in ordered_categories():
|
| 1572 |
+
for bv_code in backend.available_bureaux():
|
| 1573 |
+
override = inscrits_override if bv_code == code_bv else None
|
| 1574 |
+
bv_details, _, _ = backend.predict_bureau_details(
|
| 1575 |
+
bv_code,
|
| 1576 |
+
target_type,
|
| 1577 |
+
target_year,
|
| 1578 |
+
override,
|
| 1579 |
+
)
|
| 1580 |
+
if bv_details is None:
|
| 1581 |
+
continue
|
| 1582 |
+
base_counts = bv_details["counts"]
|
| 1583 |
+
bv_totals = bv_details["totals"]
|
| 1584 |
+
bv_inscrits = int(bv_totals.get("inscrits", 0))
|
| 1585 |
+
updated_counts = apply_transfers(base_counts, bv_inscrits, transfers)
|
| 1586 |
+
bloc_counts = {cat: int(base_counts.get(cat, 0)) for cat in ordered_categories()}
|
| 1587 |
+
updated_blocs = {cat: int(updated_counts.get(cat, 0)) for cat in ordered_categories()}
|
| 1588 |
+
top_base = max(bloc_counts, key=bloc_counts.get) if bloc_counts else None
|
| 1589 |
+
top_after = max(updated_blocs, key=updated_blocs.get) if updated_blocs else None
|
| 1590 |
+
gain = int(updated_counts.get(target_bloc, 0) - base_counts.get(target_bloc, 0))
|
| 1591 |
+
opp_rows.append(
|
| 1592 |
+
{
|
| 1593 |
+
"bureau": bureau_label_by_code.get(bv_code, bv_code),
|
| 1594 |
+
"gain_cible": gain,
|
| 1595 |
+
"score_base": int(base_counts.get(target_bloc, 0)),
|
| 1596 |
+
"score_apres": int(updated_counts.get(target_bloc, 0)),
|
| 1597 |
+
"top_base": DISPLAY_CATEGORY_LABELS.get(top_base, top_base),
|
| 1598 |
+
"top_apres": DISPLAY_CATEGORY_LABELS.get(top_after, top_after),
|
| 1599 |
+
"bascule": "oui" if top_base != target_bloc and top_after == target_bloc else "non",
|
| 1600 |
+
}
|
| 1601 |
+
)
|
| 1602 |
+
opp_df = pd.DataFrame(opp_rows, columns=OPPORTUNITY_OUTPUT_COLUMNS)
|
| 1603 |
+
if not opp_df.empty:
|
| 1604 |
+
opp_df = opp_df.sort_values(["bascule", "gain_cible"], ascending=[False, False])
|
| 1605 |
+
return sim_table, sim_plot, opp_df
|
| 1606 |
+
|
| 1607 |
+
predict_btn.click(_predict, inputs=[bureau_dd, target_dd, inscrits_in], outputs=[output_df, source_box, chart])
|
| 1608 |
+
history_btn.click(
|
| 1609 |
+
_history,
|
| 1610 |
+
inputs=[history_bureau_dd, history_election_dd],
|
| 1611 |
+
outputs=[history_df, history_source, history_chart, history_meta],
|
| 1612 |
+
)
|
| 1613 |
+
map_btn.click(
|
| 1614 |
+
_map,
|
| 1615 |
+
inputs=[map_target_dd],
|
| 1616 |
+
outputs=[map_html],
|
| 1617 |
+
)
|
| 1618 |
+
strategy_btn.click(
|
| 1619 |
+
_strategy_interval,
|
| 1620 |
+
inputs=[strategy_bureau_dd, strategy_target_dd, strategy_inscrits_in, interval_dd],
|
| 1621 |
+
outputs=[interval_df, interval_source, interval_chart],
|
| 1622 |
+
)
|
| 1623 |
+
simulate_btn.click(
|
| 1624 |
+
_strategy_simulate,
|
| 1625 |
+
inputs=[
|
| 1626 |
+
strategy_bureau_dd,
|
| 1627 |
+
strategy_target_dd,
|
| 1628 |
+
strategy_inscrits_in,
|
| 1629 |
+
target_bloc_dd,
|
| 1630 |
+
source_1_dd,
|
| 1631 |
+
target_1_dd,
|
| 1632 |
+
delta_1,
|
| 1633 |
+
source_2_dd,
|
| 1634 |
+
target_2_dd,
|
| 1635 |
+
delta_2,
|
| 1636 |
+
],
|
| 1637 |
+
outputs=[sim_df, sim_chart, opportunity_df],
|
| 1638 |
+
)
|
| 1639 |
+
return demo
|
| 1640 |
+
|
| 1641 |
+
|
| 1642 |
+
if __name__ == "__main__":
|
| 1643 |
+
logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s")
|
| 1644 |
+
demo = create_interface()
|
| 1645 |
+
demo.launch(server_name="0.0.0.0", server_port=7860)
|
config/communes.yaml
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
communes:
|
| 2 |
+
# - code_insee: "34003"
|
| 3 |
+
# nom: "Agde"
|
| 4 |
+
# - code_insee: "34101"
|
| 5 |
+
# nom: "Florensac"
|
| 6 |
+
# - code_insee: "34199"
|
| 7 |
+
# nom: "Pezenas"
|
| 8 |
+
# - code_insee: "34300"
|
| 9 |
+
# nom: "Servian"
|
| 10 |
+
- code_insee: "34301"
|
| 11 |
+
nom: "Sete"
|
config/nuances.yaml
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Mapping des nuances politiques vers les blocs.
|
| 2 |
+
#
|
| 3 |
+
# - base_mapping: chemin vers le CSV historique (optionnel).
|
| 4 |
+
# - overrides: liste d'ajouts/surcharges pour des nuances absentes ou nouvelles.
|
| 5 |
+
# - mapping: mapping complet si vous ne voulez pas utiliser base_mapping.
|
| 6 |
+
|
| 7 |
+
base_mapping: data/mapping_candidats_blocs.csv
|
| 8 |
+
|
| 9 |
+
# Exemple d'ajout/surcharge :
|
| 10 |
+
# overrides:
|
| 11 |
+
# - code_candidature: "XYZ"
|
| 12 |
+
# nom_candidature: "Exemple de nuance"
|
| 13 |
+
# blocs: [gauche_modere, centre]
|
| 14 |
+
overrides: []
|
config/raw_sources.yaml
ADDED
|
@@ -0,0 +1,344 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
14_EU.csv:
|
| 2 |
+
code_bv_cols:
|
| 3 |
+
- Code de la commune
|
| 4 |
+
- N° de bureau de vote
|
| 5 |
+
date_scrutin: '2014-05-25'
|
| 6 |
+
rename_map:
|
| 7 |
+
Code nuance du candidat: code_candidature
|
| 8 |
+
Exprimés: exprimes
|
| 9 |
+
Exprimés: exprimes
|
| 10 |
+
Inscrits: inscrits
|
| 11 |
+
Nom du candidat: nom_candidature
|
| 12 |
+
Nombre de voix du candidat: voix
|
| 13 |
+
Prénom du candidat: nom_candidature
|
| 14 |
+
Voix: voix
|
| 15 |
+
Votants: votants
|
| 16 |
+
tour_column: N° tour
|
| 17 |
+
type_scrutin: europeennes
|
| 18 |
+
14_MN14_T1T2.csv:
|
| 19 |
+
code_bv_cols:
|
| 20 |
+
- Code commune
|
| 21 |
+
- N° de bureau de vote
|
| 22 |
+
date_scrutin: '2014-03-23'
|
| 23 |
+
rename_map:
|
| 24 |
+
Code nuance de la liste: code_candidature
|
| 25 |
+
Exprimés: exprimes
|
| 26 |
+
Inscrits: inscrits
|
| 27 |
+
Nom du candidat tête de liste: nom_candidature
|
| 28 |
+
Nombre de voix: voix
|
| 29 |
+
Prénom du candidat tête de liste: nom_candidature
|
| 30 |
+
Votants: votants
|
| 31 |
+
tour_column: N° tour
|
| 32 |
+
type_scrutin: municipales
|
| 33 |
+
17_L_T1.csv:
|
| 34 |
+
code_bv_cols:
|
| 35 |
+
- Code de la commune
|
| 36 |
+
- Code du b.vote
|
| 37 |
+
date_scrutin: '2017-06-11'
|
| 38 |
+
rename_map:
|
| 39 |
+
Abstentions: abstentions
|
| 40 |
+
Blancs: blancs
|
| 41 |
+
Exprimés: exprimes
|
| 42 |
+
Inscrits: inscrits
|
| 43 |
+
Nom: nom_candidature
|
| 44 |
+
Nuance: code_candidature
|
| 45 |
+
Nuls: nuls
|
| 46 |
+
Voix: voix
|
| 47 |
+
Votants: votants
|
| 48 |
+
tour: 1
|
| 49 |
+
type_scrutin: legislatives
|
| 50 |
+
17_L_T2.csv:
|
| 51 |
+
code_bv_cols:
|
| 52 |
+
- Code de la commune
|
| 53 |
+
- Code du b.vote
|
| 54 |
+
date_scrutin: '2017-06-18'
|
| 55 |
+
rename_map:
|
| 56 |
+
Abstentions: abstentions
|
| 57 |
+
Blancs: blancs
|
| 58 |
+
Exprimés: exprimes
|
| 59 |
+
Inscrits: inscrits
|
| 60 |
+
Nom: nom_candidature
|
| 61 |
+
Nuance: code_candidature
|
| 62 |
+
Nuls: nuls
|
| 63 |
+
Voix: voix
|
| 64 |
+
Votants: votants
|
| 65 |
+
tour: 2
|
| 66 |
+
type_scrutin: legislatives
|
| 67 |
+
17_PR_T1.csv:
|
| 68 |
+
code_bv_cols:
|
| 69 |
+
- Code de la commune
|
| 70 |
+
- Code du b.vote
|
| 71 |
+
date_scrutin: '2017-04-23'
|
| 72 |
+
rename_map:
|
| 73 |
+
Abstentions: abstentions
|
| 74 |
+
Blancs: blancs
|
| 75 |
+
Code nuance du candidat: code_candidature
|
| 76 |
+
Exprimés: exprimes
|
| 77 |
+
Inscrits: inscrits
|
| 78 |
+
Nom: nom_candidature
|
| 79 |
+
Nuls: nuls
|
| 80 |
+
Voix: voix
|
| 81 |
+
Votants: votants
|
| 82 |
+
tour: 1
|
| 83 |
+
type_scrutin: presidentielles
|
| 84 |
+
17_PR_T2.csv:
|
| 85 |
+
code_bv_cols:
|
| 86 |
+
- Code de la commune
|
| 87 |
+
- Code du b.vote
|
| 88 |
+
date_scrutin: '2017-05-07'
|
| 89 |
+
rename_map:
|
| 90 |
+
Abstentions: abstentions
|
| 91 |
+
Blancs: blancs
|
| 92 |
+
Code nuance du candidat: code_candidature
|
| 93 |
+
Exprimés: exprimes
|
| 94 |
+
Inscrits: inscrits
|
| 95 |
+
Nom: nom_candidature
|
| 96 |
+
Nuls: nuls
|
| 97 |
+
Voix: voix
|
| 98 |
+
Votants: votants
|
| 99 |
+
tour: 2
|
| 100 |
+
type_scrutin: presidentielles
|
| 101 |
+
19_EU.csv:
|
| 102 |
+
code_bv_cols:
|
| 103 |
+
- Code de la commune
|
| 104 |
+
- Code du b.vote
|
| 105 |
+
date_scrutin: '2019-05-26'
|
| 106 |
+
rename_map:
|
| 107 |
+
Abstentions: abstentions
|
| 108 |
+
Blancs: blancs
|
| 109 |
+
Exprimés: exprimes
|
| 110 |
+
Inscrits: inscrits
|
| 111 |
+
Nom Tête de Liste: nom_candidature
|
| 112 |
+
Nuance Liste: code_candidature
|
| 113 |
+
Nuls: nuls
|
| 114 |
+
Voix: voix
|
| 115 |
+
Votants: votants
|
| 116 |
+
tour: 1
|
| 117 |
+
type_scrutin: europeennes
|
| 118 |
+
20_MN_T1.csv:
|
| 119 |
+
code_bv_cols:
|
| 120 |
+
- Code de la commune
|
| 121 |
+
- Code B.Vote
|
| 122 |
+
date_scrutin: '2020-03-15'
|
| 123 |
+
rename_map:
|
| 124 |
+
Abstentions: abstentions
|
| 125 |
+
Blancs: blancs
|
| 126 |
+
Code Nuance: code_candidature
|
| 127 |
+
Exprimés: exprimes
|
| 128 |
+
Inscrits: inscrits
|
| 129 |
+
Liste: nom_candidature
|
| 130 |
+
Nom: nom_candidature
|
| 131 |
+
Nuls: nuls
|
| 132 |
+
Voix: voix
|
| 133 |
+
Votants: votants
|
| 134 |
+
sep: ;
|
| 135 |
+
tour: 1
|
| 136 |
+
type_scrutin: municipales
|
| 137 |
+
20_MN_T2.csv:
|
| 138 |
+
code_bv_cols:
|
| 139 |
+
- Code de la commune
|
| 140 |
+
- Code B.Vote
|
| 141 |
+
date_scrutin: '2020-06-28'
|
| 142 |
+
rename_map:
|
| 143 |
+
Abstentions: abstentions
|
| 144 |
+
Blancs: blancs
|
| 145 |
+
Code Nuance: code_candidature
|
| 146 |
+
Exprimés: exprimes
|
| 147 |
+
Inscrits: inscrits
|
| 148 |
+
Liste: nom_candidature
|
| 149 |
+
Nom: nom_candidature
|
| 150 |
+
Nuls: nuls
|
| 151 |
+
Voix: voix
|
| 152 |
+
Votants: votants
|
| 153 |
+
tour: 2
|
| 154 |
+
type_scrutin: municipales
|
| 155 |
+
21_DEP_T1.csv:
|
| 156 |
+
code_bv_cols:
|
| 157 |
+
- Code de la commune
|
| 158 |
+
- Code du b.vote
|
| 159 |
+
date_scrutin: '2021-06-20'
|
| 160 |
+
rename_map:
|
| 161 |
+
Abstentions: abstentions
|
| 162 |
+
Binôme: nom_candidature
|
| 163 |
+
Blancs: blancs
|
| 164 |
+
Exprimés: exprimes
|
| 165 |
+
Inscrits: inscrits
|
| 166 |
+
Nuance: code_candidature
|
| 167 |
+
Nuls: nuls
|
| 168 |
+
Voix: voix
|
| 169 |
+
Votants: votants
|
| 170 |
+
tour: 1
|
| 171 |
+
type_scrutin: departementales
|
| 172 |
+
21_DEP_T2.csv:
|
| 173 |
+
code_bv_cols:
|
| 174 |
+
- Code de la commune
|
| 175 |
+
- Code du b.vote
|
| 176 |
+
date_scrutin: '2021-06-27'
|
| 177 |
+
rename_map:
|
| 178 |
+
Abstentions: abstentions
|
| 179 |
+
Binôme: nom_candidature
|
| 180 |
+
Blancs: blancs
|
| 181 |
+
Exprimés: exprimes
|
| 182 |
+
Inscrits: inscrits
|
| 183 |
+
Nuance: code_candidature
|
| 184 |
+
Nuls: nuls
|
| 185 |
+
Voix: voix
|
| 186 |
+
Votants: votants
|
| 187 |
+
tour: 2
|
| 188 |
+
type_scrutin: departementales
|
| 189 |
+
21_REG_T1.csv:
|
| 190 |
+
code_bv_cols:
|
| 191 |
+
- Code de la commune
|
| 192 |
+
- Code du b.vote
|
| 193 |
+
date_scrutin: '2021-06-20'
|
| 194 |
+
rename_map:
|
| 195 |
+
Abstentions: abstentions
|
| 196 |
+
Blancs: blancs
|
| 197 |
+
Exprimés: exprimes
|
| 198 |
+
Inscrits: inscrits
|
| 199 |
+
Libellé Abrégé Liste: nom_candidature
|
| 200 |
+
Nuance Liste: code_candidature
|
| 201 |
+
Nuls: nuls
|
| 202 |
+
Voix: voix
|
| 203 |
+
Votants: votants
|
| 204 |
+
tour: 1
|
| 205 |
+
type_scrutin: regionales
|
| 206 |
+
21_REG_T2.csv:
|
| 207 |
+
code_bv_cols:
|
| 208 |
+
- Code de la commune
|
| 209 |
+
- Code du b.vote
|
| 210 |
+
date_scrutin: '2021-06-27'
|
| 211 |
+
rename_map:
|
| 212 |
+
Abstentions: abstentions
|
| 213 |
+
Blancs: blancs
|
| 214 |
+
Exprimés: exprimes
|
| 215 |
+
Inscrits: inscrits
|
| 216 |
+
Libellé Abrégé Liste: nom_candidature
|
| 217 |
+
Nuance Liste: code_candidature
|
| 218 |
+
Nuls: nuls
|
| 219 |
+
Voix: voix
|
| 220 |
+
Votants: votants
|
| 221 |
+
tour: 2
|
| 222 |
+
type_scrutin: regionales
|
| 223 |
+
22_L_T1.csv:
|
| 224 |
+
code_bv_cols:
|
| 225 |
+
- Code de la commune
|
| 226 |
+
- Code du b.vote
|
| 227 |
+
date_scrutin: '2022-06-12'
|
| 228 |
+
rename_map:
|
| 229 |
+
Abstentions: abstentions
|
| 230 |
+
Blancs: blancs
|
| 231 |
+
Exprimés: exprimes
|
| 232 |
+
Inscrits: inscrits
|
| 233 |
+
Nom: nom_candidature
|
| 234 |
+
Nuance: code_candidature
|
| 235 |
+
Nuls: nuls
|
| 236 |
+
Voix: voix
|
| 237 |
+
Votants: votants
|
| 238 |
+
tour: 1
|
| 239 |
+
type_scrutin: legislatives
|
| 240 |
+
22_L_T2.csv:
|
| 241 |
+
code_bv_cols:
|
| 242 |
+
- Code de la commune
|
| 243 |
+
- Code du b.vote
|
| 244 |
+
date_scrutin: '2022-06-19'
|
| 245 |
+
rename_map:
|
| 246 |
+
Abstentions: abstentions
|
| 247 |
+
Blancs: blancs
|
| 248 |
+
Exprimés: exprimes
|
| 249 |
+
Inscrits: inscrits
|
| 250 |
+
Nom: nom_candidature
|
| 251 |
+
Nuance: code_candidature
|
| 252 |
+
Nuls: nuls
|
| 253 |
+
Voix: voix
|
| 254 |
+
Votants: votants
|
| 255 |
+
tour: 2
|
| 256 |
+
type_scrutin: legislatives
|
| 257 |
+
22_PR_T1.csv:
|
| 258 |
+
code_bv_cols:
|
| 259 |
+
- Code de la commune
|
| 260 |
+
- Code du b.vote
|
| 261 |
+
date_scrutin: '2022-04-10'
|
| 262 |
+
rename_map:
|
| 263 |
+
Abstentions: abstentions
|
| 264 |
+
Blancs: blancs
|
| 265 |
+
Code nuance du candidat: code_candidature
|
| 266 |
+
Exprimés: exprimes
|
| 267 |
+
Inscrits: inscrits
|
| 268 |
+
Nom: nom_candidature
|
| 269 |
+
Nuls: nuls
|
| 270 |
+
Voix: voix
|
| 271 |
+
Votants: votants
|
| 272 |
+
tour: 1
|
| 273 |
+
type_scrutin: presidentielles
|
| 274 |
+
22_PR_T2.csv:
|
| 275 |
+
code_bv_cols:
|
| 276 |
+
- Code de la commune
|
| 277 |
+
- Code du b.vote
|
| 278 |
+
date_scrutin: '2022-04-24'
|
| 279 |
+
rename_map:
|
| 280 |
+
Abstentions: abstentions
|
| 281 |
+
Blancs: blancs
|
| 282 |
+
Code nuance du candidat: code_candidature
|
| 283 |
+
Exprimés: exprimes
|
| 284 |
+
Inscrits: inscrits
|
| 285 |
+
Nom: nom_candidature
|
| 286 |
+
Nuls: nuls
|
| 287 |
+
Voix: voix
|
| 288 |
+
Votants: votants
|
| 289 |
+
tour: 2
|
| 290 |
+
type_scrutin: presidentielles
|
| 291 |
+
24_EU.csv:
|
| 292 |
+
code_bv_cols:
|
| 293 |
+
- Code commune
|
| 294 |
+
- Code BV
|
| 295 |
+
date_scrutin: '2024-06-09'
|
| 296 |
+
rename_map:
|
| 297 |
+
Abstentions: abstentions
|
| 298 |
+
Blancs: blancs
|
| 299 |
+
Exprimés: exprimes
|
| 300 |
+
Inscrits: inscrits
|
| 301 |
+
Libellé abrégé de liste 1: nom_candidature
|
| 302 |
+
Nuance liste 1: code_candidature
|
| 303 |
+
Nuls: nuls
|
| 304 |
+
Voix: voix
|
| 305 |
+
Voix 1: voix
|
| 306 |
+
Votants: votants
|
| 307 |
+
tour: 1
|
| 308 |
+
type_scrutin: europeennes
|
| 309 |
+
24_L_T1.csv:
|
| 310 |
+
code_bv_cols:
|
| 311 |
+
- Code commune
|
| 312 |
+
- Code BV
|
| 313 |
+
date_scrutin: '2024-06-30'
|
| 314 |
+
rename_map:
|
| 315 |
+
Abstentions: abstentions
|
| 316 |
+
Binôme: nom_candidature
|
| 317 |
+
Blancs: blancs
|
| 318 |
+
Exprimés: exprimes
|
| 319 |
+
Inscrits: inscrits
|
| 320 |
+
Libellé Abrégé Liste: nom_candidature
|
| 321 |
+
Nuance Liste: code_candidature
|
| 322 |
+
Nuls: nuls
|
| 323 |
+
Voix: voix
|
| 324 |
+
Votants: votants
|
| 325 |
+
tour: 1
|
| 326 |
+
type_scrutin: legislatives
|
| 327 |
+
24_L_T2.csv:
|
| 328 |
+
code_bv_cols:
|
| 329 |
+
- Code commune
|
| 330 |
+
- Code BV
|
| 331 |
+
date_scrutin: '2024-07-07'
|
| 332 |
+
rename_map:
|
| 333 |
+
Abstentions: abstentions
|
| 334 |
+
Binôme: nom_candidature
|
| 335 |
+
Blancs: blancs
|
| 336 |
+
Exprimés: exprimes
|
| 337 |
+
Inscrits: inscrits
|
| 338 |
+
Libellé Abrégé Liste: nom_candidature
|
| 339 |
+
Nuance Liste: code_candidature
|
| 340 |
+
Nuls: nuls
|
| 341 |
+
Voix: voix
|
| 342 |
+
Votants: votants
|
| 343 |
+
tour: 2
|
| 344 |
+
type_scrutin: legislatives
|
data/geo/bdv_s_te.geojson
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/geo/bdv_s_te.kml
ADDED
|
@@ -0,0 +1,1762 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<kml xmlns="http://www.opengis.net/kml/2.2"><Document>
|
| 2 |
+
<Placemark id="wC4Df">
|
| 3 |
+
<name>Bureau 01</name><ExtendedData></ExtendedData>
|
| 4 |
+
<Polygon>
|
| 5 |
+
<outerBoundaryIs>
|
| 6 |
+
<LinearRing><coordinates>3.701684,43.397393
|
| 7 |
+
3.697436,43.396753
|
| 8 |
+
3.697017,43.395943
|
| 9 |
+
3.697007,43.395428
|
| 10 |
+
3.695612,43.395241
|
| 11 |
+
3.695934,43.396504
|
| 12 |
+
3.696427,43.396473
|
| 13 |
+
3.696578,43.396831
|
| 14 |
+
3.696213,43.397159
|
| 15 |
+
3.696964,43.397128
|
| 16 |
+
3.697157,43.39751
|
| 17 |
+
3.697157,43.398079
|
| 18 |
+
3.696975,43.398211
|
| 19 |
+
3.696985,43.398858
|
| 20 |
+
3.696771,43.399809
|
| 21 |
+
3.697093,43.401649
|
| 22 |
+
3.695955,43.40182
|
| 23 |
+
3.694453,43.401555
|
| 24 |
+
3.694775,43.401384
|
| 25 |
+
3.694743,43.401189
|
| 26 |
+
3.695129,43.401212
|
| 27 |
+
3.695118,43.401064
|
| 28 |
+
3.695505,43.40108
|
| 29 |
+
3.69573,43.400955
|
| 30 |
+
3.695419,43.400745
|
| 31 |
+
3.69543,43.400441
|
| 32 |
+
3.695033,43.400277
|
| 33 |
+
3.69514,43.400129
|
| 34 |
+
3.695312,43.39995
|
| 35 |
+
3.695129,43.399833
|
| 36 |
+
3.695397,43.399669
|
| 37 |
+
3.69514,43.399279
|
| 38 |
+
3.694839,43.399295
|
| 39 |
+
3.695022,43.398437
|
| 40 |
+
3.695033,43.397611
|
| 41 |
+
3.69529,43.397564
|
| 42 |
+
3.695322,43.397736
|
| 43 |
+
3.695805,43.397704
|
| 44 |
+
3.695065,43.396091
|
| 45 |
+
3.69411,43.396247
|
| 46 |
+
3.69293,43.394867
|
| 47 |
+
3.695312,43.394914
|
| 48 |
+
3.699818,43.395522
|
| 49 |
+
3.700311,43.395319
|
| 50 |
+
3.700504,43.394929
|
| 51 |
+
3.701942,43.394134
|
| 52 |
+
3.702006,43.394399
|
| 53 |
+
3.700676,43.395132
|
| 54 |
+
3.702328,43.395257
|
| 55 |
+
3.702822,43.39574
|
| 56 |
+
3.702672,43.397112
|
| 57 |
+
3.701684,43.397393</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
|
| 58 |
+
<Placemark id="S9B2Q">
|
| 59 |
+
<name>Bureau 2</name><ExtendedData>
|
| 60 |
+
<Data name="_umap_options"><value>{"color":"MediumOrchid"}</value></Data></ExtendedData>
|
| 61 |
+
<Polygon>
|
| 62 |
+
<outerBoundaryIs>
|
| 63 |
+
<LinearRing><coordinates>3.694561,43.402506
|
| 64 |
+
3.694389,43.402514
|
| 65 |
+
3.694282,43.402569
|
| 66 |
+
3.693911,43.402456
|
| 67 |
+
3.694072,43.401622
|
| 68 |
+
3.692425,43.401626
|
| 69 |
+
3.691535,43.402896
|
| 70 |
+
3.691181,43.40281
|
| 71 |
+
3.691063,43.402974
|
| 72 |
+
3.690355,43.402849
|
| 73 |
+
3.690001,43.403325
|
| 74 |
+
3.689657,43.403301
|
| 75 |
+
3.689352,43.403804
|
| 76 |
+
3.688483,43.403644
|
| 77 |
+
3.688343,43.403906
|
| 78 |
+
3.689228,43.404089
|
| 79 |
+
3.689132,43.40426
|
| 80 |
+
3.690258,43.404455
|
| 81 |
+
3.690054,43.404615
|
| 82 |
+
3.689856,43.404747
|
| 83 |
+
3.690081,43.404779
|
| 84 |
+
3.690382,43.40451
|
| 85 |
+
3.69072,43.404701
|
| 86 |
+
3.692297,43.404829
|
| 87 |
+
3.692393,43.404915
|
| 88 |
+
3.692688,43.404541
|
| 89 |
+
3.693901,43.405055
|
| 90 |
+
3.694416,43.405149
|
| 91 |
+
3.694142,43.405036
|
| 92 |
+
3.694287,43.403995
|
| 93 |
+
3.694561,43.402506</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
|
| 94 |
+
<Placemark id="oC7F2">
|
| 95 |
+
<name>Bureau 3</name><ExtendedData>
|
| 96 |
+
<Data name="_umap_options"><value>{"color":"Chartreuse"}</value></Data></ExtendedData>
|
| 97 |
+
<Polygon>
|
| 98 |
+
<outerBoundaryIs>
|
| 99 |
+
<LinearRing><coordinates>3.69765,43.401778
|
| 100 |
+
3.698037,43.401785
|
| 101 |
+
3.697999,43.401937
|
| 102 |
+
3.698251,43.401941
|
| 103 |
+
3.698257,43.401817
|
| 104 |
+
3.698745,43.401844
|
| 105 |
+
3.698879,43.401922
|
| 106 |
+
3.699678,43.401945
|
| 107 |
+
3.699753,43.401867
|
| 108 |
+
3.700097,43.401859
|
| 109 |
+
3.700118,43.400347
|
| 110 |
+
3.701695,43.400238
|
| 111 |
+
3.701996,43.399685
|
| 112 |
+
3.701878,43.399575
|
| 113 |
+
3.701695,43.39963
|
| 114 |
+
3.699024,43.397494
|
| 115 |
+
3.697704,43.397619
|
| 116 |
+
3.69765,43.401778</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
|
| 117 |
+
<Placemark id="iUben">
|
| 118 |
+
<name>Bureau 5</name><ExtendedData>
|
| 119 |
+
<Data name="_umap_options"><value>{"color":"LightSkyBlue"}</value></Data></ExtendedData>
|
| 120 |
+
<Polygon>
|
| 121 |
+
<outerBoundaryIs>
|
| 122 |
+
<LinearRing><coordinates>3.696084,43.406825
|
| 123 |
+
3.69338,43.40656
|
| 124 |
+
3.69352,43.405079
|
| 125 |
+
3.694432,43.405149
|
| 126 |
+
3.694142,43.40504
|
| 127 |
+
3.694282,43.404073
|
| 128 |
+
3.694571,43.402522
|
| 129 |
+
3.694421,43.402514
|
| 130 |
+
3.694271,43.402577
|
| 131 |
+
3.693917,43.40246
|
| 132 |
+
3.694067,43.401637
|
| 133 |
+
3.694464,43.401583
|
| 134 |
+
3.694689,43.40168
|
| 135 |
+
3.695419,43.401727
|
| 136 |
+
3.695698,43.401668
|
| 137 |
+
3.695891,43.40182
|
| 138 |
+
3.696631,43.401828
|
| 139 |
+
3.696685,43.401672
|
| 140 |
+
3.697082,43.40168
|
| 141 |
+
3.697168,43.402179
|
| 142 |
+
3.696524,43.405313
|
| 143 |
+
3.696084,43.406825</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
|
| 144 |
+
<Placemark id="rI3Wb">
|
| 145 |
+
<name>Bureau 6</name><ExtendedData></ExtendedData>
|
| 146 |
+
<Polygon>
|
| 147 |
+
<outerBoundaryIs>
|
| 148 |
+
<LinearRing><coordinates>3.690253,43.404459
|
| 149 |
+
3.689126,43.40426
|
| 150 |
+
3.689228,43.404081
|
| 151 |
+
3.688332,43.403909
|
| 152 |
+
3.687989,43.404354
|
| 153 |
+
3.689298,43.405059
|
| 154 |
+
3.689309,43.405164
|
| 155 |
+
3.689239,43.405671
|
| 156 |
+
3.68866,43.405605
|
| 157 |
+
3.688102,43.406045
|
| 158 |
+
3.693359,43.406552
|
| 159 |
+
3.693531,43.405063
|
| 160 |
+
3.69441,43.405157
|
| 161 |
+
3.693895,43.405055
|
| 162 |
+
3.692704,43.404549
|
| 163 |
+
3.692372,43.404915
|
| 164 |
+
3.692297,43.404829
|
| 165 |
+
3.690687,43.404712
|
| 166 |
+
3.690408,43.404517
|
| 167 |
+
3.690076,43.404782
|
| 168 |
+
3.689851,43.404751
|
| 169 |
+
3.690253,43.404459</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
|
| 170 |
+
<Placemark id="AaY2i">
|
| 171 |
+
<name>Bureau 7</name><ExtendedData>
|
| 172 |
+
<Data name="_umap_options"><value>{"color":"Chartreuse"}</value></Data></ExtendedData>
|
| 173 |
+
<Polygon>
|
| 174 |
+
<outerBoundaryIs>
|
| 175 |
+
<LinearRing><coordinates>3.695998,43.408056
|
| 176 |
+
3.69514,43.409366
|
| 177 |
+
3.694577,43.410207
|
| 178 |
+
3.694679,43.410324
|
| 179 |
+
3.694968,43.410262
|
| 180 |
+
3.694936,43.41034
|
| 181 |
+
3.696814,43.410659
|
| 182 |
+
3.69705,43.410846
|
| 183 |
+
3.69735,43.410901
|
| 184 |
+
3.69765,43.410823
|
| 185 |
+
3.697758,43.410496
|
| 186 |
+
3.697565,43.410262
|
| 187 |
+
3.697629,43.410083
|
| 188 |
+
3.697876,43.410083
|
| 189 |
+
3.697715,43.409864
|
| 190 |
+
3.697876,43.409405
|
| 191 |
+
3.698101,43.409444
|
| 192 |
+
3.698133,43.409311
|
| 193 |
+
3.697972,43.409272
|
| 194 |
+
3.698047,43.409085
|
| 195 |
+
3.698262,43.409023
|
| 196 |
+
3.698326,43.408898
|
| 197 |
+
3.698616,43.408945
|
| 198 |
+
3.698659,43.408867
|
| 199 |
+
3.698809,43.408882
|
| 200 |
+
3.699163,43.408906
|
| 201 |
+
3.700461,43.407573
|
| 202 |
+
3.696985,43.405967
|
| 203 |
+
3.695998,43.408056</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
|
| 204 |
+
<Placemark id="aZn3x">
|
| 205 |
+
<name>Bureau 8</name><ExtendedData></ExtendedData>
|
| 206 |
+
<Polygon>
|
| 207 |
+
<outerBoundaryIs>
|
| 208 |
+
<LinearRing><coordinates>3.685505,43.400745
|
| 209 |
+
3.683478,43.402506
|
| 210 |
+
3.683617,43.402978
|
| 211 |
+
3.682657,43.403387
|
| 212 |
+
3.682415,43.402994
|
| 213 |
+
3.68226,43.403029
|
| 214 |
+
3.682512,43.403434
|
| 215 |
+
3.6816,43.403754
|
| 216 |
+
3.67954,43.404178
|
| 217 |
+
3.67924,43.403952
|
| 218 |
+
3.679261,43.404323
|
| 219 |
+
3.679036,43.404393
|
| 220 |
+
3.67895,43.40428
|
| 221 |
+
3.678735,43.404369
|
| 222 |
+
3.678371,43.404159
|
| 223 |
+
3.67468,43.404291
|
| 224 |
+
3.674455,43.403941
|
| 225 |
+
3.674412,43.404307
|
| 226 |
+
3.673221,43.404299
|
| 227 |
+
3.673344,43.403929
|
| 228 |
+
3.67262,43.403793
|
| 229 |
+
3.672566,43.403964
|
| 230 |
+
3.673135,43.404065
|
| 231 |
+
3.673028,43.404354
|
| 232 |
+
3.670903,43.404058
|
| 233 |
+
3.670158,43.40403
|
| 234 |
+
3.67019,43.403652
|
| 235 |
+
3.670624,43.403625
|
| 236 |
+
3.67122,43.403711
|
| 237 |
+
3.671177,43.402997
|
| 238 |
+
3.671982,43.403052
|
| 239 |
+
3.672041,43.402464
|
| 240 |
+
3.670802,43.402389
|
| 241 |
+
3.67085,43.402085
|
| 242 |
+
3.669734,43.401992
|
| 243 |
+
3.669605,43.400675
|
| 244 |
+
3.669884,43.400604
|
| 245 |
+
3.669949,43.400199
|
| 246 |
+
3.669648,43.400168
|
| 247 |
+
3.669595,43.399895
|
| 248 |
+
3.669412,43.399778
|
| 249 |
+
3.669434,43.398866
|
| 250 |
+
3.671139,43.398889
|
| 251 |
+
3.671805,43.399022
|
| 252 |
+
3.672631,43.398788
|
| 253 |
+
3.672684,43.399178
|
| 254 |
+
3.673489,43.399076
|
| 255 |
+
3.673553,43.398897
|
| 256 |
+
3.674058,43.399193
|
| 257 |
+
3.674326,43.3991
|
| 258 |
+
3.674841,43.398593
|
| 259 |
+
3.67424,43.397502
|
| 260 |
+
3.67969,43.396722
|
| 261 |
+
3.67939,43.395849
|
| 262 |
+
3.68042,43.395849
|
| 263 |
+
3.680334,43.395553
|
| 264 |
+
3.680634,43.395038
|
| 265 |
+
3.681214,43.395272
|
| 266 |
+
3.681922,43.39507
|
| 267 |
+
3.682179,43.39549
|
| 268 |
+
3.684711,43.395179
|
| 269 |
+
3.687222,43.394602
|
| 270 |
+
3.686106,43.39507
|
| 271 |
+
3.68602,43.395865
|
| 272 |
+
3.686557,43.396582
|
| 273 |
+
3.687694,43.396722
|
| 274 |
+
3.684068,43.398858
|
| 275 |
+
3.683789,43.399373
|
| 276 |
+
3.683295,43.399326
|
| 277 |
+
3.682973,43.398593
|
| 278 |
+
3.682888,43.399232
|
| 279 |
+
3.680592,43.39903
|
| 280 |
+
3.680506,43.399731
|
| 281 |
+
3.680849,43.400261
|
| 282 |
+
3.679454,43.400932
|
| 283 |
+
3.679519,43.401399
|
| 284 |
+
3.682652,43.400994
|
| 285 |
+
3.685355,43.400355
|
| 286 |
+
3.686407,43.400542
|
| 287 |
+
3.68602,43.400916
|
| 288 |
+
3.685505,43.400745</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
|
| 289 |
+
<Placemark id="Lvrgc">
|
| 290 |
+
<name>Bureau 9</name><ExtendedData></ExtendedData>
|
| 291 |
+
<Polygon>
|
| 292 |
+
<outerBoundaryIs>
|
| 293 |
+
<LinearRing><coordinates>3.676193,43.394968
|
| 294 |
+
3.676021,43.394929
|
| 295 |
+
3.675334,43.395132
|
| 296 |
+
3.674229,43.395452
|
| 297 |
+
3.674304,43.395701
|
| 298 |
+
3.6738,43.395631
|
| 299 |
+
3.674004,43.395888
|
| 300 |
+
3.672953,43.396738
|
| 301 |
+
3.672684,43.396161
|
| 302 |
+
3.671644,43.396473
|
| 303 |
+
3.671494,43.395997
|
| 304 |
+
3.671311,43.396091
|
| 305 |
+
3.671064,43.395834
|
| 306 |
+
3.670957,43.395841
|
| 307 |
+
3.671536,43.396496
|
| 308 |
+
3.670045,43.397034
|
| 309 |
+
3.669294,43.396496
|
| 310 |
+
3.669595,43.396356
|
| 311 |
+
3.669455,43.396138
|
| 312 |
+
3.668704,43.396262
|
| 313 |
+
3.669133,43.397284
|
| 314 |
+
3.668039,43.397517
|
| 315 |
+
3.668189,43.397782
|
| 316 |
+
3.667835,43.397814
|
| 317 |
+
3.667309,43.396839
|
| 318 |
+
3.667084,43.396964
|
| 319 |
+
3.66688,43.396785
|
| 320 |
+
3.665882,43.397268
|
| 321 |
+
3.665518,43.397439
|
| 322 |
+
3.665088,43.397018
|
| 323 |
+
3.66408,43.397533
|
| 324 |
+
3.663715,43.397065
|
| 325 |
+
3.663501,43.397455
|
| 326 |
+
3.66408,43.398196
|
| 327 |
+
3.662975,43.398889
|
| 328 |
+
3.661838,43.397829
|
| 329 |
+
3.661623,43.397892
|
| 330 |
+
3.661087,43.397829
|
| 331 |
+
3.661344,43.397252
|
| 332 |
+
3.660979,43.3968
|
| 333 |
+
3.659177,43.395927
|
| 334 |
+
3.659177,43.395771
|
| 335 |
+
3.658276,43.395319
|
| 336 |
+
3.658426,43.394882
|
| 337 |
+
3.659134,43.395163
|
| 338 |
+
3.660293,43.393962
|
| 339 |
+
3.659456,43.393651
|
| 340 |
+
3.659756,43.39337
|
| 341 |
+
3.659155,43.393074
|
| 342 |
+
3.659306,43.392653
|
| 343 |
+
3.660915,43.393292
|
| 344 |
+
3.662825,43.392559
|
| 345 |
+
3.663275,43.392606
|
| 346 |
+
3.664262,43.39178
|
| 347 |
+
3.664606,43.392622
|
| 348 |
+
3.664241,43.393011
|
| 349 |
+
3.665164,43.393292
|
| 350 |
+
3.666129,43.393027
|
| 351 |
+
3.666472,43.392762
|
| 352 |
+
3.668532,43.39284
|
| 353 |
+
3.669326,43.393697
|
| 354 |
+
3.670056,43.393994
|
| 355 |
+
3.671853,43.393452
|
| 356 |
+
3.672304,43.393204
|
| 357 |
+
3.672692,43.392871
|
| 358 |
+
3.672816,43.392707
|
| 359 |
+
3.672907,43.392353
|
| 360 |
+
3.672888,43.392261
|
| 361 |
+
3.67277,43.392039
|
| 362 |
+
3.672532,43.391752
|
| 363 |
+
3.671815,43.391998
|
| 364 |
+
3.671687,43.391889
|
| 365 |
+
3.673041,43.391458
|
| 366 |
+
3.673334,43.391322
|
| 367 |
+
3.673449,43.391191
|
| 368 |
+
3.673315,43.391054
|
| 369 |
+
3.673406,43.390916
|
| 370 |
+
3.673497,43.390899
|
| 371 |
+
3.673462,43.39083
|
| 372 |
+
3.673435,43.390758
|
| 373 |
+
3.673468,43.390708
|
| 374 |
+
3.673588,43.39069
|
| 375 |
+
3.673567,43.390649
|
| 376 |
+
3.673709,43.390614
|
| 377 |
+
3.6738,43.390743
|
| 378 |
+
3.673972,43.390702
|
| 379 |
+
3.67409,43.39069
|
| 380 |
+
3.67409,43.390836
|
| 381 |
+
3.674176,43.390797
|
| 382 |
+
3.674444,43.390821
|
| 383 |
+
3.6745,43.390863
|
| 384 |
+
3.674503,43.390978
|
| 385 |
+
3.674567,43.390965
|
| 386 |
+
3.674543,43.390733
|
| 387 |
+
3.674626,43.390719
|
| 388 |
+
3.674669,43.391678
|
| 389 |
+
3.67542,43.392146
|
| 390 |
+
3.674766,43.392629
|
| 391 |
+
3.675013,43.392793
|
| 392 |
+
3.676311,43.393619
|
| 393 |
+
3.67718,43.394586
|
| 394 |
+
3.676193,43.394968</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
|
| 395 |
+
<Placemark id="ouLu8">
|
| 396 |
+
<name>Bureau 10</name><ExtendedData>
|
| 397 |
+
<Data name="_umap_options"><value>{"color":"DodgerBlue"}</value></Data></ExtendedData>
|
| 398 |
+
<Polygon>
|
| 399 |
+
<outerBoundaryIs>
|
| 400 |
+
<LinearRing><coordinates>3.699142,43.406349
|
| 401 |
+
3.699667,43.405687
|
| 402 |
+
3.699721,43.404588
|
| 403 |
+
3.698401,43.404401
|
| 404 |
+
3.698766,43.404237
|
| 405 |
+
3.698841,43.404237
|
| 406 |
+
3.698906,43.404104
|
| 407 |
+
3.699389,43.40412
|
| 408 |
+
3.699346,43.403691
|
| 409 |
+
3.699238,43.403652
|
| 410 |
+
3.699249,43.403598
|
| 411 |
+
3.699378,43.40359
|
| 412 |
+
3.699378,43.403504
|
| 413 |
+
3.69912,43.403465
|
| 414 |
+
3.699249,43.403247
|
| 415 |
+
3.699431,43.403177
|
| 416 |
+
3.699528,43.403192
|
| 417 |
+
3.699517,43.403372
|
| 418 |
+
3.699667,43.403403
|
| 419 |
+
3.699678,43.403356
|
| 420 |
+
3.699979,43.403496
|
| 421 |
+
3.699946,43.403637
|
| 422 |
+
3.699957,43.403754
|
| 423 |
+
3.699818,43.403878
|
| 424 |
+
3.699946,43.403909
|
| 425 |
+
3.699946,43.403987
|
| 426 |
+
3.700075,43.404081
|
| 427 |
+
3.700161,43.404026
|
| 428 |
+
3.700966,43.404447
|
| 429 |
+
3.701234,43.40451
|
| 430 |
+
3.702028,43.404564
|
| 431 |
+
3.702135,43.402678
|
| 432 |
+
3.70235,43.402709
|
| 433 |
+
3.704849,43.404619
|
| 434 |
+
3.705676,43.404112
|
| 435 |
+
3.702757,43.401462
|
| 436 |
+
3.702307,43.400869
|
| 437 |
+
3.703766,43.398468
|
| 438 |
+
3.706942,43.397876
|
| 439 |
+
3.708701,43.398905
|
| 440 |
+
3.707199,43.401867
|
| 441 |
+
3.7081,43.402304
|
| 442 |
+
3.711233,43.39825
|
| 443 |
+
3.714023,43.398344
|
| 444 |
+
3.715096,43.398811
|
| 445 |
+
3.716512,43.400776
|
| 446 |
+
3.723121,43.406482
|
| 447 |
+
3.725696,43.407168
|
| 448 |
+
3.725395,43.413185
|
| 449 |
+
3.726854,43.413808
|
| 450 |
+
3.724966,43.413933
|
| 451 |
+
3.725309,43.416115
|
| 452 |
+
3.724751,43.418016
|
| 453 |
+
3.707671,43.410441
|
| 454 |
+
3.699142,43.406349</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
|
| 455 |
+
<Placemark id="043I7">
|
| 456 |
+
<name>Bureau 11</name><ExtendedData></ExtendedData>
|
| 457 |
+
<Polygon>
|
| 458 |
+
<outerBoundaryIs>
|
| 459 |
+
<LinearRing><coordinates>3.695483,43.401092
|
| 460 |
+
3.695124,43.401068
|
| 461 |
+
3.695092,43.401205
|
| 462 |
+
3.694743,43.401197
|
| 463 |
+
3.694764,43.401392
|
| 464 |
+
3.694443,43.401555
|
| 465 |
+
3.694099,43.401629
|
| 466 |
+
3.692431,43.401618
|
| 467 |
+
3.691556,43.402896
|
| 468 |
+
3.691245,43.402787
|
| 469 |
+
3.690977,43.402974
|
| 470 |
+
3.690248,43.402865
|
| 471 |
+
3.689969,43.403364
|
| 472 |
+
3.689625,43.403255
|
| 473 |
+
3.689303,43.403707
|
| 474 |
+
3.688467,43.40366
|
| 475 |
+
3.688338,43.403863
|
| 476 |
+
3.687544,43.403551
|
| 477 |
+
3.688037,43.402943
|
| 478 |
+
3.687673,43.402896
|
| 479 |
+
3.686063,43.403551
|
| 480 |
+
3.685784,43.403925
|
| 481 |
+
3.684626,43.403644
|
| 482 |
+
3.685699,43.402428
|
| 483 |
+
3.684497,43.403551
|
| 484 |
+
3.683767,43.403738
|
| 485 |
+
3.683488,43.402475
|
| 486 |
+
3.685548,43.40076
|
| 487 |
+
3.685956,43.400947
|
| 488 |
+
3.686407,43.400526
|
| 489 |
+
3.685398,43.400371
|
| 490 |
+
3.684475,43.400573
|
| 491 |
+
3.682737,43.400963
|
| 492 |
+
3.679605,43.401337
|
| 493 |
+
3.679454,43.400994
|
| 494 |
+
3.680892,43.400261
|
| 495 |
+
3.68057,43.3997
|
| 496 |
+
3.68057,43.399045
|
| 497 |
+
3.68293,43.399232
|
| 498 |
+
3.682952,43.398562
|
| 499 |
+
3.683274,43.399342
|
| 500 |
+
3.683875,43.399373
|
| 501 |
+
3.684111,43.398858
|
| 502 |
+
3.686643,43.397471
|
| 503 |
+
3.687651,43.396722
|
| 504 |
+
3.687201,43.396644
|
| 505 |
+
3.686578,43.396551
|
| 506 |
+
3.685999,43.395865
|
| 507 |
+
3.686106,43.395116
|
| 508 |
+
3.687136,43.394695
|
| 509 |
+
3.687737,43.394539
|
| 510 |
+
3.687823,43.393541
|
| 511 |
+
3.688311,43.393113
|
| 512 |
+
3.689121,43.393323
|
| 513 |
+
3.689057,43.394446
|
| 514 |
+
3.689362,43.394485
|
| 515 |
+
3.689867,43.394173
|
| 516 |
+
3.6897,43.394064
|
| 517 |
+
3.68992,43.393284
|
| 518 |
+
3.690779,43.393514
|
| 519 |
+
3.690918,43.393354
|
| 520 |
+
3.691503,43.393666
|
| 521 |
+
3.691396,43.393791
|
| 522 |
+
3.692029,43.393955
|
| 523 |
+
3.692586,43.394765
|
| 524 |
+
3.69323,43.395467
|
| 525 |
+
3.69337,43.395381
|
| 526 |
+
3.694115,43.396239
|
| 527 |
+
3.695049,43.396083
|
| 528 |
+
3.6958,43.397689
|
| 529 |
+
3.695328,43.397736
|
| 530 |
+
3.695285,43.397556
|
| 531 |
+
3.695033,43.397615
|
| 532 |
+
3.695022,43.398461
|
| 533 |
+
3.694839,43.399318
|
| 534 |
+
3.695129,43.399279
|
| 535 |
+
3.695354,43.399685
|
| 536 |
+
3.695124,43.399829
|
| 537 |
+
3.695301,43.399957
|
| 538 |
+
3.695033,43.400269
|
| 539 |
+
3.69543,43.400448
|
| 540 |
+
3.69543,43.40076
|
| 541 |
+
3.695703,43.400963
|
| 542 |
+
3.695483,43.401092</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
|
| 543 |
+
<Placemark id="YpoWc">
|
| 544 |
+
<name>Bureau 12</name><ExtendedData>
|
| 545 |
+
<Data name="_umap_options"><value>{"color":"Orchid"}</value></Data></ExtendedData>
|
| 546 |
+
<Polygon>
|
| 547 |
+
<outerBoundaryIs>
|
| 548 |
+
<LinearRing><coordinates>3.698788,43.417642
|
| 549 |
+
3.694496,43.415242
|
| 550 |
+
3.696127,43.413964
|
| 551 |
+
3.694046,43.41352
|
| 552 |
+
3.69308,43.41327
|
| 553 |
+
3.69279,43.412849
|
| 554 |
+
3.692715,43.412164
|
| 555 |
+
3.693123,43.411735
|
| 556 |
+
3.694153,43.411501
|
| 557 |
+
3.694679,43.410348
|
| 558 |
+
3.694957,43.41027
|
| 559 |
+
3.694968,43.410355
|
| 560 |
+
3.696781,43.410636
|
| 561 |
+
3.697039,43.410823
|
| 562 |
+
3.697318,43.410917
|
| 563 |
+
3.697661,43.410815
|
| 564 |
+
3.697758,43.410488
|
| 565 |
+
3.697565,43.410277
|
| 566 |
+
3.697608,43.410106
|
| 567 |
+
3.697886,43.410075
|
| 568 |
+
3.697715,43.409872
|
| 569 |
+
3.697876,43.409428
|
| 570 |
+
3.69808,43.409451
|
| 571 |
+
3.698123,43.409334
|
| 572 |
+
3.697994,43.409264
|
| 573 |
+
3.698037,43.409089
|
| 574 |
+
3.698246,43.409027
|
| 575 |
+
3.698326,43.40891
|
| 576 |
+
3.698595,43.408945
|
| 577 |
+
3.698654,43.408871
|
| 578 |
+
3.699169,43.408906
|
| 579 |
+
3.700461,43.407569
|
| 580 |
+
3.723378,43.417892
|
| 581 |
+
3.722091,43.4221
|
| 582 |
+
3.708315,43.423004
|
| 583 |
+
3.706555,43.423066
|
| 584 |
+
3.706555,43.422037
|
| 585 |
+
3.705482,43.42185
|
| 586 |
+
3.703079,43.422006
|
| 587 |
+
3.698788,43.417642</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
|
| 588 |
+
<Placemark id="6iuwV">
|
| 589 |
+
<name>Bureau 13</name><ExtendedData></ExtendedData>
|
| 590 |
+
<Polygon>
|
| 591 |
+
<outerBoundaryIs>
|
| 592 |
+
<LinearRing><coordinates>3.699099,43.406365
|
| 593 |
+
3.699657,43.405687
|
| 594 |
+
3.69971,43.404564
|
| 595 |
+
3.698391,43.404401
|
| 596 |
+
3.698777,43.404221
|
| 597 |
+
3.698852,43.404221
|
| 598 |
+
3.698895,43.404081
|
| 599 |
+
3.699389,43.40412
|
| 600 |
+
3.699356,43.403691
|
| 601 |
+
3.699238,43.40366
|
| 602 |
+
3.699249,43.403598
|
| 603 |
+
3.699356,43.403598
|
| 604 |
+
3.699378,43.403496
|
| 605 |
+
3.69911,43.403457
|
| 606 |
+
3.699238,43.403239
|
| 607 |
+
3.699442,43.403169
|
| 608 |
+
3.699528,43.4032
|
| 609 |
+
3.699528,43.403372
|
| 610 |
+
3.699678,43.403395
|
| 611 |
+
3.699678,43.403325
|
| 612 |
+
3.699968,43.403473
|
| 613 |
+
3.699946,43.403629
|
| 614 |
+
3.699989,43.403738
|
| 615 |
+
3.699839,43.403863
|
| 616 |
+
3.699968,43.403878
|
| 617 |
+
3.699946,43.403964
|
| 618 |
+
3.700086,43.404065
|
| 619 |
+
3.700172,43.404011
|
| 620 |
+
3.700279,43.404065
|
| 621 |
+
3.700998,43.40444
|
| 622 |
+
3.701245,43.404502
|
| 623 |
+
3.701341,43.402608
|
| 624 |
+
3.700118,43.402538
|
| 625 |
+
3.700129,43.401875
|
| 626 |
+
3.699764,43.401883
|
| 627 |
+
3.699678,43.401961
|
| 628 |
+
3.698895,43.401945
|
| 629 |
+
3.698756,43.401859
|
| 630 |
+
3.698294,43.401836
|
| 631 |
+
3.69823,43.401953
|
| 632 |
+
3.698015,43.401937
|
| 633 |
+
3.698026,43.40182
|
| 634 |
+
3.697715,43.401797
|
| 635 |
+
3.697017,43.405414
|
| 636 |
+
3.699099,43.406365</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
|
| 637 |
+
<Placemark id="MY3Lr">
|
| 638 |
+
<name>Bureau 14</name><ExtendedData></ExtendedData>
|
| 639 |
+
<Polygon>
|
| 640 |
+
<outerBoundaryIs>
|
| 641 |
+
<LinearRing><coordinates>3.676322,43.393604
|
| 642 |
+
3.674841,43.392653
|
| 643 |
+
3.675442,43.392169
|
| 644 |
+
3.674734,43.391639
|
| 645 |
+
3.674669,43.390797
|
| 646 |
+
3.675592,43.390782
|
| 647 |
+
3.677952,43.391156
|
| 648 |
+
3.677952,43.391795
|
| 649 |
+
3.68012,43.392793
|
| 650 |
+
3.681772,43.393214
|
| 651 |
+
3.682652,43.393386
|
| 652 |
+
3.683553,43.393339
|
| 653 |
+
3.684046,43.392809
|
| 654 |
+
3.684347,43.392193
|
| 655 |
+
3.684411,43.39238
|
| 656 |
+
3.685001,43.392372
|
| 657 |
+
3.685591,43.392473
|
| 658 |
+
3.685763,43.392325
|
| 659 |
+
3.686117,43.392216
|
| 660 |
+
3.686718,43.392341
|
| 661 |
+
3.688048,43.392317
|
| 662 |
+
3.688509,43.392169
|
| 663 |
+
3.688499,43.392271
|
| 664 |
+
3.690162,43.392224
|
| 665 |
+
3.690097,43.392076
|
| 666 |
+
3.691063,43.392045
|
| 667 |
+
3.6919,43.392068
|
| 668 |
+
3.691953,43.392403
|
| 669 |
+
3.692222,43.392505
|
| 670 |
+
3.692082,43.392575
|
| 671 |
+
3.692254,43.393019
|
| 672 |
+
3.692619,43.393269
|
| 673 |
+
3.692898,43.393245
|
| 674 |
+
3.692887,43.393362
|
| 675 |
+
3.693606,43.393861
|
| 676 |
+
3.693713,43.394165
|
| 677 |
+
3.694046,43.394196
|
| 678 |
+
3.694153,43.394344
|
| 679 |
+
3.694013,43.394555
|
| 680 |
+
3.694668,43.394695
|
| 681 |
+
3.695322,43.394914
|
| 682 |
+
3.692908,43.394851
|
| 683 |
+
3.693367,43.395387
|
| 684 |
+
3.693222,43.395457
|
| 685 |
+
3.692594,43.394785
|
| 686 |
+
3.692436,43.394526
|
| 687 |
+
3.692055,43.393945
|
| 688 |
+
3.691353,43.393791
|
| 689 |
+
3.691535,43.393682
|
| 690 |
+
3.690902,43.393347
|
| 691 |
+
3.690784,43.393503
|
| 692 |
+
3.689915,43.393276
|
| 693 |
+
3.6897,43.394064
|
| 694 |
+
3.689861,43.394165
|
| 695 |
+
3.689346,43.394477
|
| 696 |
+
3.689046,43.394446
|
| 697 |
+
3.689132,43.393323
|
| 698 |
+
3.688306,43.393105
|
| 699 |
+
3.687769,43.393565
|
| 700 |
+
3.687737,43.394485
|
| 701 |
+
3.685226,43.395046
|
| 702 |
+
3.682158,43.395522
|
| 703 |
+
3.681997,43.395085
|
| 704 |
+
3.681235,43.39528
|
| 705 |
+
3.680506,43.395093
|
| 706 |
+
3.680334,43.395506
|
| 707 |
+
3.680559,43.395724
|
| 708 |
+
3.68042,43.39588
|
| 709 |
+
3.67969,43.395763
|
| 710 |
+
3.679562,43.395607
|
| 711 |
+
3.679358,43.395678
|
| 712 |
+
3.679723,43.396605
|
| 713 |
+
3.679519,43.396847
|
| 714 |
+
3.677899,43.396941
|
| 715 |
+
3.677019,43.397284
|
| 716 |
+
3.676622,43.396379
|
| 717 |
+
3.675978,43.396535
|
| 718 |
+
3.675785,43.396161
|
| 719 |
+
3.676043,43.396005
|
| 720 |
+
3.675785,43.395709
|
| 721 |
+
3.676064,43.395506
|
| 722 |
+
3.675742,43.395023
|
| 723 |
+
3.676005,43.394933
|
| 724 |
+
3.676236,43.39496
|
| 725 |
+
3.677201,43.394598
|
| 726 |
+
3.676322,43.393604</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
|
| 727 |
+
<Placemark id="DZ7yf">
|
| 728 |
+
<name>Bureau 15</name><ExtendedData></ExtendedData>
|
| 729 |
+
<Polygon>
|
| 730 |
+
<outerBoundaryIs>
|
| 731 |
+
<LinearRing><coordinates>3.668125,43.401805
|
| 732 |
+
3.667738,43.401384
|
| 733 |
+
3.667331,43.401322
|
| 734 |
+
3.667052,43.401072
|
| 735 |
+
3.666987,43.400495
|
| 736 |
+
3.666344,43.400417
|
| 737 |
+
3.665721,43.400137
|
| 738 |
+
3.664992,43.400308
|
| 739 |
+
3.664412,43.3997
|
| 740 |
+
3.664927,43.39931
|
| 741 |
+
3.665614,43.39903
|
| 742 |
+
3.667674,43.398656
|
| 743 |
+
3.667159,43.398281
|
| 744 |
+
3.666108,43.398656
|
| 745 |
+
3.665915,43.398422
|
| 746 |
+
3.665507,43.398593
|
| 747 |
+
3.665539,43.398827
|
| 748 |
+
3.6654,43.398663
|
| 749 |
+
3.665045,43.398718
|
| 750 |
+
3.665067,43.398858
|
| 751 |
+
3.664541,43.399084
|
| 752 |
+
3.66438,43.399014
|
| 753 |
+
3.663576,43.399373
|
| 754 |
+
3.662996,43.398921
|
| 755 |
+
3.664058,43.398196
|
| 756 |
+
3.663522,43.397494
|
| 757 |
+
3.663661,43.397081
|
| 758 |
+
3.664091,43.397502
|
| 759 |
+
3.665164,43.397018
|
| 760 |
+
3.66555,43.397439
|
| 761 |
+
3.666859,43.3968
|
| 762 |
+
3.667095,43.396956
|
| 763 |
+
3.667266,43.396878
|
| 764 |
+
3.667824,43.397829
|
| 765 |
+
3.668168,43.397767
|
| 766 |
+
3.668039,43.39751
|
| 767 |
+
3.669133,43.397291
|
| 768 |
+
3.668693,43.39627
|
| 769 |
+
3.669452,43.396143
|
| 770 |
+
3.669586,43.39635
|
| 771 |
+
3.669291,43.396496
|
| 772 |
+
3.670034,43.397034
|
| 773 |
+
3.671542,43.396496
|
| 774 |
+
3.670962,43.395834
|
| 775 |
+
3.671097,43.395849
|
| 776 |
+
3.671311,43.396091
|
| 777 |
+
3.671515,43.395989
|
| 778 |
+
3.671644,43.396473
|
| 779 |
+
3.672695,43.396165
|
| 780 |
+
3.672953,43.396722
|
| 781 |
+
3.673983,43.39588
|
| 782 |
+
3.6738,43.395631
|
| 783 |
+
3.674326,43.39567
|
| 784 |
+
3.674262,43.395436
|
| 785 |
+
3.675807,43.395007
|
| 786 |
+
3.676085,43.395584
|
| 787 |
+
3.675731,43.395709
|
| 788 |
+
3.675989,43.396075
|
| 789 |
+
3.675753,43.396153
|
| 790 |
+
3.675989,43.396535
|
| 791 |
+
3.676579,43.396325
|
| 792 |
+
3.676922,43.397057
|
| 793 |
+
3.675731,43.397416
|
| 794 |
+
3.674326,43.397455
|
| 795 |
+
3.674819,43.398562
|
| 796 |
+
3.674304,43.399092
|
| 797 |
+
3.674058,43.399162
|
| 798 |
+
3.673586,43.398882
|
| 799 |
+
3.673478,43.399076
|
| 800 |
+
3.672706,43.399186
|
| 801 |
+
3.672652,43.398772
|
| 802 |
+
3.672051,43.398936
|
| 803 |
+
3.671751,43.399014
|
| 804 |
+
3.671182,43.398913
|
| 805 |
+
3.669423,43.398866
|
| 806 |
+
3.669391,43.399357
|
| 807 |
+
3.669423,43.399794
|
| 808 |
+
3.669584,43.399918
|
| 809 |
+
3.669637,43.400176
|
| 810 |
+
3.669895,43.400183
|
| 811 |
+
3.669863,43.400581
|
| 812 |
+
3.669605,43.400659
|
| 813 |
+
3.66967,43.400979
|
| 814 |
+
3.669723,43.401953
|
| 815 |
+
3.670871,43.402046
|
| 816 |
+
3.670785,43.402389
|
| 817 |
+
3.67203,43.402467
|
| 818 |
+
3.671987,43.403036
|
| 819 |
+
3.671172,43.40299
|
| 820 |
+
3.671225,43.403715
|
| 821 |
+
3.670571,43.403613
|
| 822 |
+
3.668125,43.401805</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
|
| 823 |
+
<Placemark id="xGVYD">
|
| 824 |
+
<name>Bureau 16</name><ExtendedData>
|
| 825 |
+
<Data name="_umap_options"><value>{"color":"Chartreuse"}</value></Data></ExtendedData>
|
| 826 |
+
<Polygon>
|
| 827 |
+
<outerBoundaryIs>
|
| 828 |
+
<LinearRing><coordinates>3.693091,43.406544
|
| 829 |
+
3.688059,43.406037
|
| 830 |
+
3.688617,43.405609
|
| 831 |
+
3.689175,43.405655
|
| 832 |
+
3.68926,43.405063
|
| 833 |
+
3.687952,43.404362
|
| 834 |
+
3.688316,43.403878
|
| 835 |
+
3.687458,43.403535
|
| 836 |
+
3.686299,43.403785
|
| 837 |
+
3.686128,43.404128
|
| 838 |
+
3.686299,43.404362
|
| 839 |
+
3.685634,43.404845
|
| 840 |
+
3.685913,43.405297
|
| 841 |
+
3.685441,43.405422
|
| 842 |
+
3.685098,43.405843
|
| 843 |
+
3.685784,43.406435
|
| 844 |
+
3.68778,43.406201
|
| 845 |
+
3.688016,43.406326
|
| 846 |
+
3.687705,43.406451
|
| 847 |
+
3.687812,43.40677
|
| 848 |
+
3.688606,43.406864
|
| 849 |
+
3.688692,43.406778
|
| 850 |
+
3.689089,43.406825
|
| 851 |
+
3.689142,43.406716
|
| 852 |
+
3.689325,43.406856
|
| 853 |
+
3.689314,43.406957
|
| 854 |
+
3.689893,43.406988
|
| 855 |
+
3.689904,43.406926
|
| 856 |
+
3.690033,43.406949
|
| 857 |
+
3.690076,43.406786
|
| 858 |
+
3.691031,43.406856
|
| 859 |
+
3.691245,43.407027
|
| 860 |
+
3.691245,43.407136
|
| 861 |
+
3.692694,43.407285
|
| 862 |
+
3.693112,43.407456
|
| 863 |
+
3.693144,43.407565
|
| 864 |
+
3.693262,43.407573
|
| 865 |
+
3.693466,43.408033
|
| 866 |
+
3.69426,43.408742
|
| 867 |
+
3.694646,43.40843
|
| 868 |
+
3.694904,43.408664
|
| 869 |
+
3.696041,43.406887
|
| 870 |
+
3.693091,43.406544</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
|
| 871 |
+
<Placemark id="gFNtY">
|
| 872 |
+
<name>Bureau 17</name><ExtendedData></ExtendedData>
|
| 873 |
+
<Polygon>
|
| 874 |
+
<outerBoundaryIs>
|
| 875 |
+
<LinearRing><coordinates>3.691058,43.407949
|
| 876 |
+
3.691127,43.407534
|
| 877 |
+
3.691015,43.407394
|
| 878 |
+
3.691224,43.40716
|
| 879 |
+
3.692683,43.407292
|
| 880 |
+
3.693112,43.407479
|
| 881 |
+
3.693112,43.407573
|
| 882 |
+
3.693241,43.407588
|
| 883 |
+
3.693434,43.408033
|
| 884 |
+
3.69426,43.408742
|
| 885 |
+
3.694646,43.408446
|
| 886 |
+
3.694893,43.408649
|
| 887 |
+
3.69441,43.409475
|
| 888 |
+
3.693627,43.410371
|
| 889 |
+
3.692554,43.409467
|
| 890 |
+
3.69175,43.409864
|
| 891 |
+
3.69117,43.409381
|
| 892 |
+
3.691084,43.409303
|
| 893 |
+
3.690988,43.409233
|
| 894 |
+
3.690923,43.409186
|
| 895 |
+
3.690966,43.408746
|
| 896 |
+
3.690816,43.408461
|
| 897 |
+
3.69091,43.408017
|
| 898 |
+
3.691058,43.407949</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
|
| 899 |
+
<Placemark id="8BP79">
|
| 900 |
+
<name>Bureau 18</name><ExtendedData></ExtendedData>
|
| 901 |
+
<Polygon>
|
| 902 |
+
<outerBoundaryIs>
|
| 903 |
+
<LinearRing><coordinates>3.670152,43.404026
|
| 904 |
+
3.670893,43.404089
|
| 905 |
+
3.670882,43.404245
|
| 906 |
+
3.671955,43.404323
|
| 907 |
+
3.671805,43.404954
|
| 908 |
+
3.672878,43.405281
|
| 909 |
+
3.672727,43.405624
|
| 910 |
+
3.672566,43.405882
|
| 911 |
+
3.672116,43.405819
|
| 912 |
+
3.671708,43.406661
|
| 913 |
+
3.670732,43.408095
|
| 914 |
+
3.66864,43.407386
|
| 915 |
+
3.668082,43.408235
|
| 916 |
+
3.667567,43.408539
|
| 917 |
+
3.667073,43.408204
|
| 918 |
+
3.665936,43.408906
|
| 919 |
+
3.665636,43.408641
|
| 920 |
+
3.663962,43.409693
|
| 921 |
+
3.66246,43.407807
|
| 922 |
+
3.664391,43.406677
|
| 923 |
+
3.664262,43.406037
|
| 924 |
+
3.664445,43.405928
|
| 925 |
+
3.66496,43.406248
|
| 926 |
+
3.665142,43.405967
|
| 927 |
+
3.664638,43.405648
|
| 928 |
+
3.664353,43.405894
|
| 929 |
+
3.664261,43.405819
|
| 930 |
+
3.664553,43.405594
|
| 931 |
+
3.664362,43.405496
|
| 932 |
+
3.664099,43.4057
|
| 933 |
+
3.663975,43.405595
|
| 934 |
+
3.664235,43.405398
|
| 935 |
+
3.663028,43.404151
|
| 936 |
+
3.66276,43.404026
|
| 937 |
+
3.662513,43.403933
|
| 938 |
+
3.662417,43.403629
|
| 939 |
+
3.663093,43.403528
|
| 940 |
+
3.663104,43.403177
|
| 941 |
+
3.663865,43.40313
|
| 942 |
+
3.663983,43.403294
|
| 943 |
+
3.667288,43.403489
|
| 944 |
+
3.670174,43.403668
|
| 945 |
+
3.670152,43.404026</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
|
| 946 |
+
<Placemark id="jva7z">
|
| 947 |
+
<name>Bureau 19</name><ExtendedData></ExtendedData>
|
| 948 |
+
<Polygon>
|
| 949 |
+
<outerBoundaryIs>
|
| 950 |
+
<LinearRing><coordinates>3.670721,43.408126
|
| 951 |
+
3.67173,43.406638
|
| 952 |
+
3.672116,43.405796
|
| 953 |
+
3.672577,43.405897
|
| 954 |
+
3.673317,43.406279
|
| 955 |
+
3.673156,43.406677
|
| 956 |
+
3.675624,43.407331
|
| 957 |
+
3.675742,43.406934
|
| 958 |
+
3.676043,43.406903
|
| 959 |
+
3.676,43.406606
|
| 960 |
+
3.676375,43.406536
|
| 961 |
+
3.676375,43.406201
|
| 962 |
+
3.677684,43.40663
|
| 963 |
+
3.678285,43.406606
|
| 964 |
+
3.67851,43.406778
|
| 965 |
+
3.678124,43.407479
|
| 966 |
+
3.676622,43.408493
|
| 967 |
+
3.676257,43.409974
|
| 968 |
+
3.670721,43.408126</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
|
| 969 |
+
<Placemark id="UJ4zH">
|
| 970 |
+
<name>Bureau 20</name><ExtendedData></ExtendedData>
|
| 971 |
+
<Polygon>
|
| 972 |
+
<outerBoundaryIs>
|
| 973 |
+
<LinearRing><coordinates>3.690891,43.409194
|
| 974 |
+
3.690956,43.409194
|
| 975 |
+
3.691771,43.409872
|
| 976 |
+
3.692554,43.409459
|
| 977 |
+
3.693627,43.410387
|
| 978 |
+
3.69293,43.410698
|
| 979 |
+
3.692265,43.411454
|
| 980 |
+
3.69175,43.411376
|
| 981 |
+
3.691063,43.41147
|
| 982 |
+
3.690677,43.411408
|
| 983 |
+
3.69029,43.411454
|
| 984 |
+
3.690012,43.411415
|
| 985 |
+
3.689904,43.411579
|
| 986 |
+
3.689507,43.411618
|
| 987 |
+
3.689443,43.411486
|
| 988 |
+
3.68926,43.411571
|
| 989 |
+
3.6891,43.4114
|
| 990 |
+
3.689228,43.411314
|
| 991 |
+
3.689239,43.411135
|
| 992 |
+
3.688992,43.41108
|
| 993 |
+
3.688756,43.41087
|
| 994 |
+
3.688853,43.410831
|
| 995 |
+
3.688767,43.41062
|
| 996 |
+
3.688434,43.410605
|
| 997 |
+
3.688091,43.410246
|
| 998 |
+
3.688252,43.41002
|
| 999 |
+
3.689089,43.40956
|
| 1000 |
+
3.689057,43.409272
|
| 1001 |
+
3.690548,43.408461
|
| 1002 |
+
3.69072,43.408563
|
| 1003 |
+
3.690891,43.409194</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
|
| 1004 |
+
<Placemark id="r7MPB">
|
| 1005 |
+
<name>Bureau 21</name><ExtendedData></ExtendedData>
|
| 1006 |
+
<Polygon>
|
| 1007 |
+
<outerBoundaryIs>
|
| 1008 |
+
<LinearRing><coordinates>3.688402,43.410605
|
| 1009 |
+
3.688767,43.410613
|
| 1010 |
+
3.688874,43.410823
|
| 1011 |
+
3.688745,43.41087
|
| 1012 |
+
3.688982,43.411072
|
| 1013 |
+
3.68926,43.411111
|
| 1014 |
+
3.68925,43.411306
|
| 1015 |
+
3.6891,43.4114
|
| 1016 |
+
3.689239,43.411563
|
| 1017 |
+
3.68911,43.411673
|
| 1018 |
+
3.687887,43.411852
|
| 1019 |
+
3.687973,43.411665
|
| 1020 |
+
3.687737,43.411447
|
| 1021 |
+
3.687544,43.411415
|
| 1022 |
+
3.687222,43.411026
|
| 1023 |
+
3.687576,43.410815
|
| 1024 |
+
3.687383,43.410652
|
| 1025 |
+
3.687179,43.410706
|
| 1026 |
+
3.686568,43.410067
|
| 1027 |
+
3.687469,43.409646
|
| 1028 |
+
3.687136,43.409303
|
| 1029 |
+
3.686804,43.409436
|
| 1030 |
+
3.685892,43.408384
|
| 1031 |
+
3.685033,43.408797
|
| 1032 |
+
3.684744,43.408532
|
| 1033 |
+
3.684937,43.408313
|
| 1034 |
+
3.685838,43.407892
|
| 1035 |
+
3.685634,43.407651
|
| 1036 |
+
3.686514,43.407246
|
| 1037 |
+
3.686353,43.406996
|
| 1038 |
+
3.686535,43.406957
|
| 1039 |
+
3.68646,43.406716
|
| 1040 |
+
3.686954,43.406591
|
| 1041 |
+
3.686879,43.40638
|
| 1042 |
+
3.687758,43.406193
|
| 1043 |
+
3.688011,43.406337
|
| 1044 |
+
3.687721,43.406439
|
| 1045 |
+
3.687807,43.406786
|
| 1046 |
+
3.688622,43.406864
|
| 1047 |
+
3.688697,43.406782
|
| 1048 |
+
3.689121,43.406825
|
| 1049 |
+
3.689164,43.40672
|
| 1050 |
+
3.68933,43.40686
|
| 1051 |
+
3.689314,43.406965
|
| 1052 |
+
3.689904,43.407004
|
| 1053 |
+
3.689931,43.40695
|
| 1054 |
+
3.690049,43.406954
|
| 1055 |
+
3.690092,43.406802
|
| 1056 |
+
3.691031,43.406903
|
| 1057 |
+
3.691246,43.407137
|
| 1058 |
+
3.69102,43.407414
|
| 1059 |
+
3.691128,43.407554
|
| 1060 |
+
3.691074,43.407979
|
| 1061 |
+
3.690908,43.408037
|
| 1062 |
+
3.690827,43.408474
|
| 1063 |
+
3.690988,43.408758
|
| 1064 |
+
3.690924,43.40921
|
| 1065 |
+
3.69058,43.408509
|
| 1066 |
+
3.689046,43.409296
|
| 1067 |
+
3.6891,43.409576
|
| 1068 |
+
3.68822,43.41002
|
| 1069 |
+
3.688091,43.410254
|
| 1070 |
+
3.688402,43.410605</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
|
| 1071 |
+
<Placemark id="8NzmY">
|
| 1072 |
+
<name>Bureau 22</name><ExtendedData></ExtendedData>
|
| 1073 |
+
<Polygon>
|
| 1074 |
+
<outerBoundaryIs>
|
| 1075 |
+
<LinearRing><coordinates>3.685741,43.406965
|
| 1076 |
+
3.681879,43.407744
|
| 1077 |
+
3.681686,43.407542
|
| 1078 |
+
3.681257,43.40762
|
| 1079 |
+
3.681107,43.407433
|
| 1080 |
+
3.680592,43.407464
|
| 1081 |
+
3.680613,43.407183
|
| 1082 |
+
3.679819,43.407183
|
| 1083 |
+
3.679132,43.406809
|
| 1084 |
+
3.678596,43.406762
|
| 1085 |
+
3.678339,43.406622
|
| 1086 |
+
3.677609,43.406638
|
| 1087 |
+
3.676364,43.406217
|
| 1088 |
+
3.676322,43.406497
|
| 1089 |
+
3.675978,43.406575
|
| 1090 |
+
3.676,43.406887
|
| 1091 |
+
3.675742,43.406903
|
| 1092 |
+
3.67557,43.407292
|
| 1093 |
+
3.67321,43.4067
|
| 1094 |
+
3.67336,43.406295
|
| 1095 |
+
3.672631,43.405874
|
| 1096 |
+
3.672867,43.405281
|
| 1097 |
+
3.671901,43.40497
|
| 1098 |
+
3.671944,43.404362
|
| 1099 |
+
3.670914,43.404268
|
| 1100 |
+
3.670893,43.404065
|
| 1101 |
+
3.67306,43.404362
|
| 1102 |
+
3.673146,43.40405
|
| 1103 |
+
3.672566,43.403987
|
| 1104 |
+
3.672631,43.403785
|
| 1105 |
+
3.673339,43.403925
|
| 1106 |
+
3.673189,43.404315
|
| 1107 |
+
3.674433,43.40433
|
| 1108 |
+
3.674476,43.403987
|
| 1109 |
+
3.674669,43.404315
|
| 1110 |
+
3.67836,43.404143
|
| 1111 |
+
3.678725,43.404377
|
| 1112 |
+
3.678972,43.404291
|
| 1113 |
+
3.679036,43.404393
|
| 1114 |
+
3.679283,43.404323
|
| 1115 |
+
3.67925,43.403956
|
| 1116 |
+
3.67954,43.40419
|
| 1117 |
+
3.680667,43.403941
|
| 1118 |
+
3.681611,43.403754
|
| 1119 |
+
3.682029,43.403582
|
| 1120 |
+
3.682523,43.403426
|
| 1121 |
+
3.682265,43.403036
|
| 1122 |
+
3.682415,43.40299
|
| 1123 |
+
3.682662,43.403379
|
| 1124 |
+
3.683628,43.402982
|
| 1125 |
+
3.683488,43.402514
|
| 1126 |
+
3.683703,43.402982
|
| 1127 |
+
3.683767,43.40306
|
| 1128 |
+
3.683639,43.403699
|
| 1129 |
+
3.684475,43.403574
|
| 1130 |
+
3.685623,43.40253
|
| 1131 |
+
3.685656,43.402584
|
| 1132 |
+
3.684551,43.403598
|
| 1133 |
+
3.685795,43.404026
|
| 1134 |
+
3.686053,43.403582
|
| 1135 |
+
3.686997,43.403208
|
| 1136 |
+
3.687683,43.402951
|
| 1137 |
+
3.688059,43.40299
|
| 1138 |
+
3.687555,43.40352
|
| 1139 |
+
3.686321,43.403754
|
| 1140 |
+
3.686106,43.404112
|
| 1141 |
+
3.686192,43.404346
|
| 1142 |
+
3.685613,43.40486
|
| 1143 |
+
3.685859,43.405258
|
| 1144 |
+
3.685377,43.405375
|
| 1145 |
+
3.685119,43.405889
|
| 1146 |
+
3.685741,43.406497
|
| 1147 |
+
3.686557,43.406404
|
| 1148 |
+
3.685741,43.406965</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
|
| 1149 |
+
<Placemark id="7iQ5D">
|
| 1150 |
+
<name>Bureau 23</name><ExtendedData></ExtendedData>
|
| 1151 |
+
<Polygon>
|
| 1152 |
+
<outerBoundaryIs>
|
| 1153 |
+
<LinearRing><coordinates>3.682984,43.413442
|
| 1154 |
+
3.680667,43.413987
|
| 1155 |
+
3.679122,43.41426
|
| 1156 |
+
3.677469,43.414377
|
| 1157 |
+
3.676718,43.413831
|
| 1158 |
+
3.67704,43.413442
|
| 1159 |
+
3.67733,43.413582
|
| 1160 |
+
3.677588,43.413247
|
| 1161 |
+
3.677341,43.413099
|
| 1162 |
+
3.677802,43.412514
|
| 1163 |
+
3.677974,43.4126
|
| 1164 |
+
3.678832,43.411992
|
| 1165 |
+
3.679068,43.412109
|
| 1166 |
+
3.67924,43.411587
|
| 1167 |
+
3.678886,43.411462
|
| 1168 |
+
3.679014,43.411298
|
| 1169 |
+
3.67887,43.411248
|
| 1170 |
+
3.679004,43.411104
|
| 1171 |
+
3.679808,43.411088
|
| 1172 |
+
3.681117,43.411587
|
| 1173 |
+
3.681332,43.411369
|
| 1174 |
+
3.681332,43.411033
|
| 1175 |
+
3.681686,43.411174
|
| 1176 |
+
3.681718,43.411088
|
| 1177 |
+
3.681514,43.41101
|
| 1178 |
+
3.681718,43.410706
|
| 1179 |
+
3.681096,43.410457
|
| 1180 |
+
3.681332,43.410028
|
| 1181 |
+
3.682834,43.410036
|
| 1182 |
+
3.683102,43.410231
|
| 1183 |
+
3.683295,43.410332
|
| 1184 |
+
3.683982,43.410129
|
| 1185 |
+
3.683692,43.40988
|
| 1186 |
+
3.6841,43.409615
|
| 1187 |
+
3.684025,43.409576
|
| 1188 |
+
3.68439,43.409334
|
| 1189 |
+
3.685452,43.410449
|
| 1190 |
+
3.685269,43.41055
|
| 1191 |
+
3.685473,43.410792
|
| 1192 |
+
3.684443,43.411275
|
| 1193 |
+
3.684529,43.411727
|
| 1194 |
+
3.684787,43.411899
|
| 1195 |
+
3.684078,43.412148
|
| 1196 |
+
3.684422,43.413013
|
| 1197 |
+
3.682984,43.413442</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
|
| 1198 |
+
<Placemark id="oQc1Y">
|
| 1199 |
+
<name>Bureau 24</name><ExtendedData>
|
| 1200 |
+
<Data name="_umap_options"><value>{"color":"Chartreuse"}</value></Data></ExtendedData>
|
| 1201 |
+
<Polygon>
|
| 1202 |
+
<outerBoundaryIs>
|
| 1203 |
+
<LinearRing><coordinates>3.664713,43.417081
|
| 1204 |
+
3.657546,43.412093
|
| 1205 |
+
3.659241,43.410784
|
| 1206 |
+
3.661923,43.410161
|
| 1207 |
+
3.662347,43.410523
|
| 1208 |
+
3.662771,43.411505
|
| 1209 |
+
3.661956,43.412027
|
| 1210 |
+
3.661301,43.411614
|
| 1211 |
+
3.660829,43.411969
|
| 1212 |
+
3.662642,43.413208
|
| 1213 |
+
3.662642,43.413489
|
| 1214 |
+
3.662975,43.413738
|
| 1215 |
+
3.663232,43.413668
|
| 1216 |
+
3.66364,43.413917
|
| 1217 |
+
3.663608,43.414151
|
| 1218 |
+
3.663844,43.414291
|
| 1219 |
+
3.664273,43.414369
|
| 1220 |
+
3.66555,43.415289
|
| 1221 |
+
3.665679,43.415164
|
| 1222 |
+
3.665979,43.415343
|
| 1223 |
+
3.665807,43.415484
|
| 1224 |
+
3.666322,43.415834
|
| 1225 |
+
3.664713,43.417081</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
|
| 1226 |
+
<Placemark id="yDXRa">
|
| 1227 |
+
<name>Bureau 25</name><ExtendedData></ExtendedData>
|
| 1228 |
+
<Polygon>
|
| 1229 |
+
<outerBoundaryIs>
|
| 1230 |
+
<LinearRing><coordinates>3.66982,43.419637
|
| 1231 |
+
3.671257,43.420557
|
| 1232 |
+
3.674004,43.423518
|
| 1233 |
+
3.674026,43.423923
|
| 1234 |
+
3.675549,43.424422
|
| 1235 |
+
3.6763,43.423658
|
| 1236 |
+
3.677244,43.423658
|
| 1237 |
+
3.677888,43.421913
|
| 1238 |
+
3.677094,43.420962
|
| 1239 |
+
3.673983,43.418842
|
| 1240 |
+
3.673768,43.417175
|
| 1241 |
+
3.673961,43.416255
|
| 1242 |
+
3.674583,43.415647
|
| 1243 |
+
3.675302,43.415141
|
| 1244 |
+
3.676461,43.414696
|
| 1245 |
+
3.6791,43.41447
|
| 1246 |
+
3.681911,43.413925
|
| 1247 |
+
3.682501,43.413956
|
| 1248 |
+
3.682823,43.414057
|
| 1249 |
+
3.683242,43.413379
|
| 1250 |
+
3.680677,43.413995
|
| 1251 |
+
3.679068,43.414276
|
| 1252 |
+
3.677502,43.414361
|
| 1253 |
+
3.676611,43.41447
|
| 1254 |
+
3.675356,43.414455
|
| 1255 |
+
3.674852,43.414494
|
| 1256 |
+
3.67483,43.414572
|
| 1257 |
+
3.674465,43.414556
|
| 1258 |
+
3.674423,43.414665
|
| 1259 |
+
3.67395,43.414743
|
| 1260 |
+
3.673382,43.415468
|
| 1261 |
+
3.673736,43.415624
|
| 1262 |
+
3.673521,43.41592
|
| 1263 |
+
3.673221,43.415756
|
| 1264 |
+
3.673081,43.41592
|
| 1265 |
+
3.672695,43.415717
|
| 1266 |
+
3.672266,43.416232
|
| 1267 |
+
3.67292,43.416598
|
| 1268 |
+
3.672792,43.416801
|
| 1269 |
+
3.672588,43.416692
|
| 1270 |
+
3.671719,43.417728
|
| 1271 |
+
3.671955,43.417853
|
| 1272 |
+
3.671268,43.418648
|
| 1273 |
+
3.670903,43.418492
|
| 1274 |
+
3.67071,43.41871
|
| 1275 |
+
3.669788,43.418157
|
| 1276 |
+
3.669938,43.418001
|
| 1277 |
+
3.669809,43.417907
|
| 1278 |
+
3.669423,43.418281
|
| 1279 |
+
3.669863,43.418601
|
| 1280 |
+
3.670163,43.418796
|
| 1281 |
+
3.670067,43.418905
|
| 1282 |
+
3.670238,43.418967
|
| 1283 |
+
3.670152,43.419076
|
| 1284 |
+
3.669959,43.418991
|
| 1285 |
+
3.669884,43.419084
|
| 1286 |
+
3.66968,43.418975
|
| 1287 |
+
3.66938,43.4191
|
| 1288 |
+
3.669262,43.419045
|
| 1289 |
+
3.669273,43.418702
|
| 1290 |
+
3.668908,43.418788
|
| 1291 |
+
3.668575,43.419232
|
| 1292 |
+
3.668897,43.419559
|
| 1293 |
+
3.66982,43.419637</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
|
| 1294 |
+
<Placemark id="roqV8">
|
| 1295 |
+
<name>Bureau 26</name><ExtendedData>
|
| 1296 |
+
<Data name="_umap_options"><value>{"color":"Crimson"}</value></Data></ExtendedData>
|
| 1297 |
+
<Polygon>
|
| 1298 |
+
<outerBoundaryIs>
|
| 1299 |
+
<LinearRing><coordinates>3.662868,43.410254
|
| 1300 |
+
3.665528,43.412171
|
| 1301 |
+
3.665496,43.412771
|
| 1302 |
+
3.667234,43.414057
|
| 1303 |
+
3.667331,43.415647
|
| 1304 |
+
3.666998,43.415749
|
| 1305 |
+
3.666215,43.415187
|
| 1306 |
+
3.666033,43.415297
|
| 1307 |
+
3.665775,43.415071
|
| 1308 |
+
3.666022,43.414891
|
| 1309 |
+
3.665282,43.414385
|
| 1310 |
+
3.665013,43.414548
|
| 1311 |
+
3.664702,43.414322
|
| 1312 |
+
3.664906,43.414167
|
| 1313 |
+
3.664713,43.413987
|
| 1314 |
+
3.664262,43.414346
|
| 1315 |
+
3.663844,43.414307
|
| 1316 |
+
3.663597,43.414167
|
| 1317 |
+
3.663629,43.413902
|
| 1318 |
+
3.663222,43.413652
|
| 1319 |
+
3.662964,43.41373
|
| 1320 |
+
3.662642,43.413504
|
| 1321 |
+
3.662642,43.413224
|
| 1322 |
+
3.663189,43.412865
|
| 1323 |
+
3.662503,43.412382
|
| 1324 |
+
3.662213,43.412577
|
| 1325 |
+
3.66172,43.412203
|
| 1326 |
+
3.662792,43.41147
|
| 1327 |
+
3.662353,43.410496
|
| 1328 |
+
3.662868,43.410254</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
|
| 1329 |
+
<Placemark id="6YOqE">
|
| 1330 |
+
<name>Bureau 27</name><ExtendedData></ExtendedData>
|
| 1331 |
+
<Polygon>
|
| 1332 |
+
<outerBoundaryIs>
|
| 1333 |
+
<LinearRing><coordinates>3.674819,43.414556
|
| 1334 |
+
3.674476,43.414548
|
| 1335 |
+
3.67439,43.414681
|
| 1336 |
+
3.673961,43.414743
|
| 1337 |
+
3.67336,43.415468
|
| 1338 |
+
3.673725,43.415639
|
| 1339 |
+
3.673511,43.415889
|
| 1340 |
+
3.673221,43.415764
|
| 1341 |
+
3.673092,43.415904
|
| 1342 |
+
3.672663,43.415733
|
| 1343 |
+
3.672255,43.416208
|
| 1344 |
+
3.67292,43.416614
|
| 1345 |
+
3.672781,43.416801
|
| 1346 |
+
3.672609,43.416707
|
| 1347 |
+
3.67173,43.417728
|
| 1348 |
+
3.671933,43.417861
|
| 1349 |
+
3.671247,43.418632
|
| 1350 |
+
3.670914,43.418492
|
| 1351 |
+
3.6707,43.418694
|
| 1352 |
+
3.669755,43.418164
|
| 1353 |
+
3.669949,43.418009
|
| 1354 |
+
3.66982,43.417923
|
| 1355 |
+
3.669401,43.418274
|
| 1356 |
+
3.670152,43.418788
|
| 1357 |
+
3.670056,43.418913
|
| 1358 |
+
3.670228,43.418944
|
| 1359 |
+
3.670131,43.419053
|
| 1360 |
+
3.669873,43.419076
|
| 1361 |
+
3.669659,43.418967
|
| 1362 |
+
3.669348,43.419107
|
| 1363 |
+
3.669262,43.419045
|
| 1364 |
+
3.669219,43.41871
|
| 1365 |
+
3.668865,43.418803
|
| 1366 |
+
3.668565,43.419193
|
| 1367 |
+
3.667953,43.418788
|
| 1368 |
+
3.667803,43.41878
|
| 1369 |
+
3.667427,43.419022
|
| 1370 |
+
3.666569,43.418546
|
| 1371 |
+
3.666676,43.418507
|
| 1372 |
+
3.667438,43.41892
|
| 1373 |
+
3.667749,43.418702
|
| 1374 |
+
3.666655,43.418016
|
| 1375 |
+
3.666462,43.41818
|
| 1376 |
+
3.666258,43.418445
|
| 1377 |
+
3.666097,43.418991
|
| 1378 |
+
3.66599,43.418959
|
| 1379 |
+
3.666011,43.418616
|
| 1380 |
+
3.666451,43.417884
|
| 1381 |
+
3.666054,43.417198
|
| 1382 |
+
3.665839,43.417214
|
| 1383 |
+
3.665679,43.417011
|
| 1384 |
+
3.665968,43.416863
|
| 1385 |
+
3.665979,43.41652
|
| 1386 |
+
3.666869,43.415889
|
| 1387 |
+
3.667556,43.415562
|
| 1388 |
+
3.667449,43.414112
|
| 1389 |
+
3.666043,43.412725
|
| 1390 |
+
3.665646,43.411977
|
| 1391 |
+
3.664573,43.411166
|
| 1392 |
+
3.663576,43.410114
|
| 1393 |
+
3.663136,43.40967
|
| 1394 |
+
3.662653,43.409553
|
| 1395 |
+
3.661956,43.410067
|
| 1396 |
+
3.661784,43.410059
|
| 1397 |
+
3.662417,43.409459
|
| 1398 |
+
3.663211,43.408906
|
| 1399 |
+
3.663919,43.409747
|
| 1400 |
+
3.665314,43.408851
|
| 1401 |
+
3.665861,43.40928
|
| 1402 |
+
3.665872,43.409529
|
| 1403 |
+
3.666215,43.40981
|
| 1404 |
+
3.666569,43.409825
|
| 1405 |
+
3.667063,43.410184
|
| 1406 |
+
3.667213,43.410371
|
| 1407 |
+
3.666773,43.410667
|
| 1408 |
+
3.666869,43.411041
|
| 1409 |
+
3.667234,43.411096
|
| 1410 |
+
3.667449,43.410979
|
| 1411 |
+
3.667631,43.411096
|
| 1412 |
+
3.667846,43.410956
|
| 1413 |
+
3.668232,43.41126
|
| 1414 |
+
3.668586,43.411143
|
| 1415 |
+
3.669155,43.411797
|
| 1416 |
+
3.668371,43.412319
|
| 1417 |
+
3.668844,43.412538
|
| 1418 |
+
3.670346,43.411462
|
| 1419 |
+
3.670689,43.411696
|
| 1420 |
+
3.671504,43.411096
|
| 1421 |
+
3.672287,43.411938
|
| 1422 |
+
3.671676,43.412351
|
| 1423 |
+
3.67203,43.4126
|
| 1424 |
+
3.671547,43.41292
|
| 1425 |
+
3.672287,43.413551
|
| 1426 |
+
3.671547,43.414034
|
| 1427 |
+
3.673725,43.4144
|
| 1428 |
+
3.674873,43.414502
|
| 1429 |
+
3.674819,43.414556</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
|
| 1430 |
+
<Placemark id="7aZin">
|
| 1431 |
+
<name>Bureau 28</name><ExtendedData></ExtendedData>
|
| 1432 |
+
<Polygon>
|
| 1433 |
+
<outerBoundaryIs>
|
| 1434 |
+
<LinearRing><coordinates>3.686471,43.406723
|
| 1435 |
+
3.686535,43.406942
|
| 1436 |
+
3.686374,43.407004
|
| 1437 |
+
3.686503,43.407238
|
| 1438 |
+
3.685634,43.407659
|
| 1439 |
+
3.685817,43.407877
|
| 1440 |
+
3.684947,43.408298
|
| 1441 |
+
3.684379,43.408937
|
| 1442 |
+
3.684658,43.408914
|
| 1443 |
+
3.684754,43.409069
|
| 1444 |
+
3.684626,43.409171
|
| 1445 |
+
3.684036,43.409576
|
| 1446 |
+
3.6841,43.409615
|
| 1447 |
+
3.683692,43.409888
|
| 1448 |
+
3.683968,43.410129
|
| 1449 |
+
3.683298,43.410332
|
| 1450 |
+
3.683099,43.410229
|
| 1451 |
+
3.682845,43.410028
|
| 1452 |
+
3.6813,43.41002
|
| 1453 |
+
3.681107,43.410449
|
| 1454 |
+
3.681707,43.410722
|
| 1455 |
+
3.681525,43.411014
|
| 1456 |
+
3.681718,43.411088
|
| 1457 |
+
3.681702,43.41117
|
| 1458 |
+
3.681327,43.411037
|
| 1459 |
+
3.681321,43.411388
|
| 1460 |
+
3.681123,43.411595
|
| 1461 |
+
3.679808,43.411082
|
| 1462 |
+
3.678998,43.411107
|
| 1463 |
+
3.678886,43.4108
|
| 1464 |
+
3.678435,43.410648
|
| 1465 |
+
3.678373,43.410515
|
| 1466 |
+
3.678854,43.40852
|
| 1467 |
+
3.679132,43.407791
|
| 1468 |
+
3.678145,43.407526
|
| 1469 |
+
3.678532,43.406778
|
| 1470 |
+
3.679111,43.40684
|
| 1471 |
+
3.679755,43.407168
|
| 1472 |
+
3.680592,43.407183
|
| 1473 |
+
3.680592,43.407479
|
| 1474 |
+
3.681085,43.407433
|
| 1475 |
+
3.681171,43.407666
|
| 1476 |
+
3.681686,43.407573
|
| 1477 |
+
3.681772,43.407776
|
| 1478 |
+
3.68572,43.406949
|
| 1479 |
+
3.686637,43.40638
|
| 1480 |
+
3.686868,43.406334
|
| 1481 |
+
3.686932,43.40656
|
| 1482 |
+
3.686471,43.406723</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
|
| 1483 |
+
<Placemark id="sJOtO">
|
| 1484 |
+
<name>Bureau 29</name><ExtendedData></ExtendedData>
|
| 1485 |
+
<Polygon>
|
| 1486 |
+
<outerBoundaryIs>
|
| 1487 |
+
<LinearRing><coordinates>3.663919,43.40327
|
| 1488 |
+
3.663822,43.403122
|
| 1489 |
+
3.663125,43.403177
|
| 1490 |
+
3.663114,43.403528
|
| 1491 |
+
3.662417,43.403621
|
| 1492 |
+
3.662524,43.403933
|
| 1493 |
+
3.66305,43.404151
|
| 1494 |
+
3.664252,43.405406
|
| 1495 |
+
3.663973,43.405594
|
| 1496 |
+
3.664099,43.405701
|
| 1497 |
+
3.664364,43.405497
|
| 1498 |
+
3.664551,43.405593
|
| 1499 |
+
3.664262,43.405821
|
| 1500 |
+
3.664352,43.405895
|
| 1501 |
+
3.664627,43.405648
|
| 1502 |
+
3.665131,43.405975
|
| 1503 |
+
3.664927,43.406232
|
| 1504 |
+
3.664445,43.405927
|
| 1505 |
+
3.664262,43.406035
|
| 1506 |
+
3.664327,43.4067
|
| 1507 |
+
3.66246,43.407807
|
| 1508 |
+
3.663232,43.408898
|
| 1509 |
+
3.661752,43.409989
|
| 1510 |
+
3.656774,43.402787
|
| 1511 |
+
3.658136,43.401399
|
| 1512 |
+
3.658544,43.401173
|
| 1513 |
+
3.659145,43.40048
|
| 1514 |
+
3.659037,43.39995
|
| 1515 |
+
3.659649,43.398819
|
| 1516 |
+
3.660765,43.399092
|
| 1517 |
+
3.662041,43.398009
|
| 1518 |
+
3.662835,43.398866
|
| 1519 |
+
3.663565,43.399396
|
| 1520 |
+
3.664391,43.398999
|
| 1521 |
+
3.664541,43.399092
|
| 1522 |
+
3.665067,43.398858
|
| 1523 |
+
3.665024,43.398718
|
| 1524 |
+
3.66541,43.39864
|
| 1525 |
+
3.66555,43.398819
|
| 1526 |
+
3.665496,43.398601
|
| 1527 |
+
3.665915,43.398437
|
| 1528 |
+
3.666086,43.398663
|
| 1529 |
+
3.667202,43.398281
|
| 1530 |
+
3.667728,43.398663
|
| 1531 |
+
3.665593,43.39903
|
| 1532 |
+
3.664863,43.399326
|
| 1533 |
+
3.66438,43.399669
|
| 1534 |
+
3.66497,43.400332
|
| 1535 |
+
3.665646,43.400137
|
| 1536 |
+
3.666354,43.400425
|
| 1537 |
+
3.666934,43.400441
|
| 1538 |
+
3.666998,43.401033
|
| 1539 |
+
3.667363,43.401368
|
| 1540 |
+
3.667728,43.401407
|
| 1541 |
+
3.668028,43.401758
|
| 1542 |
+
3.669251,43.402725
|
| 1543 |
+
3.670592,43.403629
|
| 1544 |
+
3.669852,43.40366
|
| 1545 |
+
3.667331,43.403489
|
| 1546 |
+
3.663919,43.40327</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
|
| 1547 |
+
<Placemark id="4Xq6M">
|
| 1548 |
+
<name>Bureau 30</name><ExtendedData></ExtendedData>
|
| 1549 |
+
<Polygon>
|
| 1550 |
+
<outerBoundaryIs>
|
| 1551 |
+
<LinearRing><coordinates>3.684443,43.413021
|
| 1552 |
+
3.684068,43.41214
|
| 1553 |
+
3.684829,43.411899
|
| 1554 |
+
3.68454,43.411735
|
| 1555 |
+
3.684454,43.411275
|
| 1556 |
+
3.685484,43.410792
|
| 1557 |
+
3.685259,43.41055
|
| 1558 |
+
3.685462,43.410449
|
| 1559 |
+
3.684422,43.409342
|
| 1560 |
+
3.684744,43.409062
|
| 1561 |
+
3.684669,43.408906
|
| 1562 |
+
3.684497,43.409038
|
| 1563 |
+
3.68439,43.408937
|
| 1564 |
+
3.684733,43.4085
|
| 1565 |
+
3.685033,43.408804
|
| 1566 |
+
3.685881,43.408376
|
| 1567 |
+
3.686535,43.409155
|
| 1568 |
+
3.686804,43.409451
|
| 1569 |
+
3.687136,43.409295
|
| 1570 |
+
3.687479,43.409646
|
| 1571 |
+
3.68661,43.410067
|
| 1572 |
+
3.687201,43.410737
|
| 1573 |
+
3.687394,43.410652
|
| 1574 |
+
3.687587,43.410839
|
| 1575 |
+
3.687211,43.411026
|
| 1576 |
+
3.687544,43.411408
|
| 1577 |
+
3.687748,43.411454
|
| 1578 |
+
3.687898,43.411556
|
| 1579 |
+
3.687994,43.411665
|
| 1580 |
+
3.687887,43.411844
|
| 1581 |
+
3.688155,43.411817
|
| 1582 |
+
3.68852,43.411762
|
| 1583 |
+
3.688901,43.4117
|
| 1584 |
+
3.689126,43.411661
|
| 1585 |
+
3.689448,43.411474
|
| 1586 |
+
3.689518,43.411618
|
| 1587 |
+
3.689904,43.411571
|
| 1588 |
+
3.690012,43.411404
|
| 1589 |
+
3.69029,43.41147
|
| 1590 |
+
3.690687,43.411384
|
| 1591 |
+
3.691117,43.411478
|
| 1592 |
+
3.691771,43.411369
|
| 1593 |
+
3.692232,43.41147
|
| 1594 |
+
3.693091,43.411743
|
| 1595 |
+
3.692704,43.412156
|
| 1596 |
+
3.692758,43.412865
|
| 1597 |
+
3.693048,43.413255
|
| 1598 |
+
3.696095,43.413987
|
| 1599 |
+
3.695741,43.414237
|
| 1600 |
+
3.694711,43.414065
|
| 1601 |
+
3.694614,43.414213
|
| 1602 |
+
3.694528,43.414213
|
| 1603 |
+
3.694571,43.414026
|
| 1604 |
+
3.693391,43.413839
|
| 1605 |
+
3.692833,43.414455
|
| 1606 |
+
3.690902,43.414572
|
| 1607 |
+
3.690269,43.414213
|
| 1608 |
+
3.689378,43.414167
|
| 1609 |
+
3.688681,43.414696
|
| 1610 |
+
3.688198,43.414595
|
| 1611 |
+
3.688005,43.414829
|
| 1612 |
+
3.687726,43.414743
|
| 1613 |
+
3.688027,43.414439
|
| 1614 |
+
3.687426,43.414198
|
| 1615 |
+
3.687072,43.41352
|
| 1616 |
+
3.686428,43.413231
|
| 1617 |
+
3.686213,43.413411
|
| 1618 |
+
3.686138,43.413372
|
| 1619 |
+
3.686235,43.413231
|
| 1620 |
+
3.685516,43.413052
|
| 1621 |
+
3.684754,43.413816
|
| 1622 |
+
3.684347,43.414057
|
| 1623 |
+
3.683639,43.414151
|
| 1624 |
+
3.682855,43.414042
|
| 1625 |
+
3.683231,43.413379
|
| 1626 |
+
3.684443,43.413021</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
|
| 1627 |
+
<Placemark id="oaH2G">
|
| 1628 |
+
<name>Bureau 31</name><ExtendedData>
|
| 1629 |
+
<Data name="_umap_options"><value>{"color":"BlueViolet"}</value></Data></ExtendedData>
|
| 1630 |
+
<Polygon>
|
| 1631 |
+
<outerBoundaryIs>
|
| 1632 |
+
<LinearRing><coordinates>3.648233,43.397627
|
| 1633 |
+
3.649971,43.398546
|
| 1634 |
+
3.654606,43.400402
|
| 1635 |
+
3.656645,43.402023
|
| 1636 |
+
3.656752,43.40235
|
| 1637 |
+
3.654971,43.404143
|
| 1638 |
+
3.659145,43.400472
|
| 1639 |
+
3.659027,43.399942
|
| 1640 |
+
3.659638,43.398811
|
| 1641 |
+
3.660754,43.399069
|
| 1642 |
+
3.66202,43.398001
|
| 1643 |
+
3.661805,43.397829
|
| 1644 |
+
3.661602,43.397907
|
| 1645 |
+
3.660979,43.397829
|
| 1646 |
+
3.661333,43.397315
|
| 1647 |
+
3.660979,43.396816
|
| 1648 |
+
3.659091,43.395865
|
| 1649 |
+
3.659177,43.395787
|
| 1650 |
+
3.658254,43.395319
|
| 1651 |
+
3.658447,43.394836
|
| 1652 |
+
3.659155,43.395194
|
| 1653 |
+
3.660336,43.393931
|
| 1654 |
+
3.659477,43.393666
|
| 1655 |
+
3.659756,43.393354
|
| 1656 |
+
3.659155,43.39312
|
| 1657 |
+
3.659327,43.392637
|
| 1658 |
+
3.660979,43.393261
|
| 1659 |
+
3.662868,43.392544
|
| 1660 |
+
3.662782,43.392138
|
| 1661 |
+
3.661816,43.391795
|
| 1662 |
+
3.659906,43.391296
|
| 1663 |
+
3.659542,43.390205
|
| 1664 |
+
3.657911,43.38955
|
| 1665 |
+
3.655765,43.389643
|
| 1666 |
+
3.653941,43.389565
|
| 1667 |
+
3.65422,43.389175
|
| 1668 |
+
3.653812,43.389394
|
| 1669 |
+
3.648856,43.38707
|
| 1670 |
+
3.643341,43.392045
|
| 1671 |
+
3.648233,43.397627</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
|
| 1672 |
+
<Placemark id="h0wD5">
|
| 1673 |
+
<name>Bureau 32</name><ExtendedData></ExtendedData>
|
| 1674 |
+
<Polygon>
|
| 1675 |
+
<outerBoundaryIs>
|
| 1676 |
+
<LinearRing><coordinates>3.671579,43.412951
|
| 1677 |
+
3.671987,43.412592
|
| 1678 |
+
3.671708,43.412358
|
| 1679 |
+
3.672309,43.411938
|
| 1680 |
+
3.671515,43.41108
|
| 1681 |
+
3.6707,43.411688
|
| 1682 |
+
3.670399,43.411439
|
| 1683 |
+
3.669444,43.412117
|
| 1684 |
+
3.668844,43.41253
|
| 1685 |
+
3.668329,43.412312
|
| 1686 |
+
3.669155,43.411797
|
| 1687 |
+
3.668575,43.411143
|
| 1688 |
+
3.668221,43.411228
|
| 1689 |
+
3.667835,43.410971
|
| 1690 |
+
3.66762,43.41108
|
| 1691 |
+
3.667427,43.410979
|
| 1692 |
+
3.667223,43.411088
|
| 1693 |
+
3.666848,43.411018
|
| 1694 |
+
3.666751,43.410659
|
| 1695 |
+
3.667191,43.410379
|
| 1696 |
+
3.667052,43.410184
|
| 1697 |
+
3.666537,43.40981
|
| 1698 |
+
3.666204,43.409802
|
| 1699 |
+
3.665882,43.409537
|
| 1700 |
+
3.665839,43.409264
|
| 1701 |
+
3.665335,43.408836
|
| 1702 |
+
3.665646,43.408641
|
| 1703 |
+
3.665957,43.408914
|
| 1704 |
+
3.667084,43.408204
|
| 1705 |
+
3.667556,43.408555
|
| 1706 |
+
3.668103,43.40822
|
| 1707 |
+
3.66865,43.407355
|
| 1708 |
+
3.672835,43.408875
|
| 1709 |
+
3.675238,43.409716
|
| 1710 |
+
3.677652,43.41027
|
| 1711 |
+
3.677566,43.410558
|
| 1712 |
+
3.677094,43.410566
|
| 1713 |
+
3.67615,43.411127
|
| 1714 |
+
3.67557,43.410768
|
| 1715 |
+
3.674841,43.41108
|
| 1716 |
+
3.675377,43.411751
|
| 1717 |
+
3.672287,43.413543
|
| 1718 |
+
3.671579,43.412951</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark>
|
| 1719 |
+
<Placemark id="ydMJC">
|
| 1720 |
+
<name>Bureau 33</name><ExtendedData></ExtendedData>
|
| 1721 |
+
<Polygon>
|
| 1722 |
+
<outerBoundaryIs>
|
| 1723 |
+
<LinearRing><coordinates>3.678371,43.410519
|
| 1724 |
+
3.678435,43.410652
|
| 1725 |
+
3.678875,43.410792
|
| 1726 |
+
3.679004,43.411119
|
| 1727 |
+
3.678864,43.411244
|
| 1728 |
+
3.679013,43.411298
|
| 1729 |
+
3.678887,43.411465
|
| 1730 |
+
3.679237,43.411591
|
| 1731 |
+
3.679047,43.412125
|
| 1732 |
+
3.678778,43.412008
|
| 1733 |
+
3.677942,43.412623
|
| 1734 |
+
3.677802,43.412545
|
| 1735 |
+
3.677309,43.413083
|
| 1736 |
+
3.677545,43.41327
|
| 1737 |
+
3.677298,43.41359
|
| 1738 |
+
3.67703,43.413481
|
| 1739 |
+
3.676718,43.413839
|
| 1740 |
+
3.677405,43.414361
|
| 1741 |
+
3.6766,43.41447
|
| 1742 |
+
3.675431,43.414447
|
| 1743 |
+
3.674852,43.414486
|
| 1744 |
+
3.673768,43.414408
|
| 1745 |
+
3.671569,43.41405
|
| 1746 |
+
3.672266,43.413582
|
| 1747 |
+
3.675452,43.411727
|
| 1748 |
+
3.67483,43.411104
|
| 1749 |
+
3.67557,43.410761
|
| 1750 |
+
3.67601,43.411057
|
| 1751 |
+
3.676139,43.411135
|
| 1752 |
+
3.677083,43.410589
|
| 1753 |
+
3.677545,43.410558
|
| 1754 |
+
3.677673,43.410301
|
| 1755 |
+
3.676279,43.409958
|
| 1756 |
+
3.676622,43.408532
|
| 1757 |
+
3.678049,43.407464
|
| 1758 |
+
3.678424,43.407643
|
| 1759 |
+
3.679143,43.407721
|
| 1760 |
+
3.678854,43.408508
|
| 1761 |
+
3.678575,43.409716
|
| 1762 |
+
3.678371,43.410519</coordinates></LinearRing></outerBoundaryIs></Polygon></Placemark></Document></kml>
|
data/interim/elections_long.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:70fc51d5dd8303c51339a95f818198ba0cc5f26e2a3dc951eae664eb8953a54d
|
| 3 |
+
size 2216814
|
data/mapping_candidats_blocs.csv
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
code_candidature;nom_candidature;bloc_1;bloc_2;bloc_3
|
| 2 |
+
NC;Nuance non communiquee;centre;;
|
| 3 |
+
LDIV;Divers;centre;;
|
| 4 |
+
DIV;Divers;centre;;
|
| 5 |
+
LDVD;Divers droite;droite_modere;droite_dure
|
| 6 |
+
LDVG;Divers gauche;gauche_modere;
|
| 7 |
+
LUG;Union de la gauche;;gauche_modere
|
| 8 |
+
LUD;Union de la droite;droite;droite_modere
|
| 9 |
+
LFN;Front national;extreme_droite;;
|
| 10 |
+
LEXG;Extreme gauche;extreme_gauche;;
|
| 11 |
+
LSOC;Parti socialiste;gauche_modere;
|
| 12 |
+
LUMP;Union pour un mouvement populaire;droite_modere;
|
| 13 |
+
LNC;Nouveau centre;centre;;
|
| 14 |
+
LFG;Front de gauche;gauche_dure;
|
| 15 |
+
LVEC;Europe Ecologie Les Verts;gauche_modere;;
|
| 16 |
+
LUDI;Union des democrates et independants;centre;droite_modere
|
| 17 |
+
LDVC;Divers centre;centre;;
|
| 18 |
+
LCOM;Parti communiste;gauche_dure;
|
| 19 |
+
LRN;Rassemblement national;extreme_droite;;
|
| 20 |
+
LUC;Union du centre;centre;;
|
| 21 |
+
LPG;Parti de gauche;gauche_dure;
|
| 22 |
+
LMDM;Mouvement democrate;centre;;
|
| 23 |
+
LLR;Les republicains;droite_modere;
|
| 24 |
+
LEXD;Extreme droite;extreme_droite;;
|
| 25 |
+
LREM;La republique en marche;centre;droite_modere
|
| 26 |
+
LFI;La France insoumise;gauche_dure;;
|
| 27 |
+
LECO;Ecologistes;gauche_modere;;
|
| 28 |
+
LREG;Regionalistes;centre;;
|
| 29 |
+
LGJ;Gilets jaunes;;
|
| 30 |
+
LRDG;Radicaux de gauche;gauche_modere;centre
|
| 31 |
+
LDLF;Debout la France;droite_dure;
|
| 32 |
+
RN;Rassemblement national;extreme_droite;;
|
| 33 |
+
LR;Les republicains;droite_modere;centre
|
| 34 |
+
EELV;Europe Ecologie Les Verts;gauche_modere;;
|
| 35 |
+
PS;Parti socialiste;gauche_modere;;
|
| 36 |
+
UDI;Union des democrates et independants;centre;droite_modere
|
| 37 |
+
PRG;Parti radical de gauche;gauche_modere;centre
|
| 38 |
+
DVD;Divers droite;droite_modere;droite_dure
|
| 39 |
+
DVG;Divers gauche;gauche_modere;
|
| 40 |
+
EXD;Extreme droite;extreme_droite;;
|
| 41 |
+
EXG;Extreme gauche;extreme_gauche;;
|
| 42 |
+
FN;Front national;extreme_droite;;
|
| 43 |
+
DLF;Debout la France;droite_dure;
|
| 44 |
+
REM;La republique en marche;centre;droite_modere
|
| 45 |
+
ENS;Ensemble;centre;droite_modere
|
| 46 |
+
LENS;Ensemble;centre;droite_modere
|
| 47 |
+
REC;Reconquete;extreme_droite;;
|
| 48 |
+
LREC;Reconquete;extreme_droite;;
|
| 49 |
+
DSV;Divers souverainiste;droite_dure;
|
| 50 |
+
LDSV;Divers souverainiste;droite_dure;
|
| 51 |
+
LUGE;Union de la gauche elargie;gauche_modere;
|
| 52 |
+
COM;Parti communiste;gauche_dure;
|
| 53 |
+
SOC;Parti socialiste;gauche_modere;;
|
| 54 |
+
FI;La France insoumise;gauche_dure;;
|
| 55 |
+
ECO;Ecologistes;gauche_modere;;
|
| 56 |
+
DXG;Divers extreme gauche;extreme_gauche;;
|
| 57 |
+
NUP;Nupes;gauche_dure;gauche_modere
|
| 58 |
+
BC-COM;Binome communiste;gauche_dure;
|
| 59 |
+
BC-DVD;Binome divers droite;droite_modere;droite_dure
|
| 60 |
+
BC-ECO;Binome ecologiste;gauche_modere;;
|
| 61 |
+
BC-RN;Binome rassemblement national;extreme_droite;;
|
data/mappings/category_mapping.csv
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
code_candidature;nom_candidature;bloc_1;bloc_2;bloc_3
|
| 2 |
+
NC;Nuance non communiquee;centre;;
|
| 3 |
+
LDIV;Divers;centre;;
|
| 4 |
+
DIV;Divers;centre;;
|
| 5 |
+
LDVD;Divers droite;droite_modere;droite_dure
|
| 6 |
+
LDVG;Divers gauche;gauche_modere;gauche
|
| 7 |
+
LUG;Union de la gauche;gauche;gauche_modere
|
| 8 |
+
LUD;Union de la droite;droite;droite_modere
|
| 9 |
+
LFN;Front national;extreme_droite;;
|
| 10 |
+
LEXG;Extreme gauche;extreme_gauche;;
|
| 11 |
+
LSOC;Parti socialiste;gauche_modere;gauche
|
| 12 |
+
LUMP;Union pour un mouvement populaire;droite_modere;droite
|
| 13 |
+
LNC;Nouveau centre;centre;;
|
| 14 |
+
LFG;Front de gauche;gauche_dure;gauche
|
| 15 |
+
LVEC;Europe Ecologie Les Verts;gauche_modere;;
|
| 16 |
+
LUDI;Union des democrates et independants;centre;droite_modere
|
| 17 |
+
LDVC;Divers centre;centre;;
|
| 18 |
+
LCOM;Parti communiste;gauche_dure;gauche
|
| 19 |
+
LRN;Rassemblement national;extreme_droite;;
|
| 20 |
+
LUC;Union du centre;centre;;
|
| 21 |
+
LPG;Parti de gauche;gauche_dure;gauche
|
| 22 |
+
LMDM;Mouvement democrate;centre;;
|
| 23 |
+
LLR;Les republicains;droite_modere;droite
|
| 24 |
+
LEXD;Extreme droite;extreme_droite;;
|
| 25 |
+
LREM;La republique en marche;centre;droite_modere
|
| 26 |
+
LFI;La France insoumise;gauche_dure;;
|
| 27 |
+
LECO;Ecologistes;gauche_modere;;
|
| 28 |
+
LREG;Regionalistes;centre;;
|
| 29 |
+
LGJ;Gilets jaunes;gauche;droite
|
| 30 |
+
LRDG;Radicaux de gauche;gauche_modere;centre
|
| 31 |
+
LDLF;Debout la France;droite_dure;droite
|
| 32 |
+
RN;Rassemblement national;extreme_droite;;
|
| 33 |
+
LR;Les republicains;droite_modere;centre
|
| 34 |
+
EELV;Europe Ecologie Les Verts;gauche_modere;;
|
| 35 |
+
PS;Parti socialiste;gauche_modere;;
|
| 36 |
+
UDI;Union des democrates et independants;centre;droite_modere
|
| 37 |
+
PRG;Parti radical de gauche;gauche_modere;centre
|
| 38 |
+
DVD;Divers droite;droite_modere;droite_dure
|
| 39 |
+
DVG;Divers gauche;gauche_modere;gauche
|
docker-compose.yml
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version: "3.9"
|
| 2 |
+
|
| 3 |
+
services:
|
| 4 |
+
postgres:
|
| 5 |
+
image: postgres:16
|
| 6 |
+
container_name: elections_postgres
|
| 7 |
+
restart: unless-stopped
|
| 8 |
+
env_file: .env
|
| 9 |
+
environment:
|
| 10 |
+
- POSTGRES_USER=${POSTGRES_USER}
|
| 11 |
+
- POSTGRES_PASSWORD=${POSTGRES_PASSWORD}
|
| 12 |
+
- POSTGRES_DB=${POSTGRES_DB}
|
| 13 |
+
ports:
|
| 14 |
+
- "${POSTGRES_PORT:-5432}:5432"
|
| 15 |
+
volumes:
|
| 16 |
+
- pgdata:/var/lib/postgresql/data
|
| 17 |
+
|
| 18 |
+
pgadmin:
|
| 19 |
+
image: dpage/pgadmin4:8
|
| 20 |
+
container_name: elections_pgadmin
|
| 21 |
+
restart: unless-stopped
|
| 22 |
+
depends_on:
|
| 23 |
+
- postgres
|
| 24 |
+
env_file: .env
|
| 25 |
+
environment:
|
| 26 |
+
PGADMIN_DEFAULT_EMAIL: admin@sete.fr
|
| 27 |
+
PGADMIN_DEFAULT_PASSWORD: admin
|
| 28 |
+
PGADMIN_LISTEN_PORT: 8080
|
| 29 |
+
ports:
|
| 30 |
+
- "8080:8080"
|
| 31 |
+
volumes:
|
| 32 |
+
- pgadmin_data:/var/lib/pgadmin
|
| 33 |
+
profiles:
|
| 34 |
+
- admin
|
| 35 |
+
|
| 36 |
+
volumes:
|
| 37 |
+
pgdata:
|
| 38 |
+
pgadmin_data:
|
harmoniser.md
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
% Vot/Ins -> % Votants
|
| 2 |
+
Code du d°partement -> Code du département
|
| 3 |
+
|
| 4 |
+
Exprim°s -> Exprim°s 1 -> Exprimés
|
| 5 |
+
Libell° Abr°g° Liste 1, Libellé abrégé de liste 1 -> Libellé Abrégé Liste 1
|
| 6 |
+
|
| 7 |
+
Libell° Etendu Liste 1, Liste, Libellé de liste 1, Liste.1 -> Libellé Etendu Liste 1
|
| 8 |
+
|
| 9 |
+
Libell° de la circonscription, Libellé de la circonscription
|
| 10 |
+
|
| 11 |
+
Libell° de la commune, Libellé commune -> Libellé de la commune
|
| 12 |
+
|
| 13 |
+
Libell° du d°partement, Libellé département -> Libellé du département
|
| 14 |
+
|
| 15 |
+
Nom candidat 1, Nom Tête de Liste 1, Nom T°te de Liste 1, Nom.1 -> Nom 1
|
| 16 |
+
|
| 17 |
+
Pr°nom du candidat 1, Pr°nom du candidat t°te de liste, Pr°nom.1 -> Prénom 1
|
| 18 |
+
|
| 19 |
+
N°Panneau 1, N.Pan. 1 -> N°Panneau 1
|
main.py
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import argparse
|
| 4 |
+
import subprocess
|
| 5 |
+
import sys
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
PROJECT_ROOT = Path(__file__).resolve().parent
|
| 10 |
+
PYTHON = sys.executable
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def run_step(cmd: list[str], desc: str) -> None:
|
| 14 |
+
print(f"\n=== {desc} ===")
|
| 15 |
+
result = subprocess.run(cmd, check=False)
|
| 16 |
+
if result.returncode != 0:
|
| 17 |
+
raise SystemExit(f"Echec de l'étape '{desc}' (code {result.returncode}). Commande: {' '.join(cmd)}")
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def main() -> None:
|
| 21 |
+
parser = argparse.ArgumentParser(
|
| 22 |
+
description="Pipeline orchestration: preprocess -> features -> train -> predict",
|
| 23 |
+
)
|
| 24 |
+
parser.add_argument("--raw-dir", type=Path, default=Path("data/raw"), help="Répertoire des fichiers bruts.")
|
| 25 |
+
parser.add_argument("--mapping", type=Path, default=Path("config/nuances.yaml"), help="Mapping nuances->catégories.")
|
| 26 |
+
parser.add_argument("--target-election", type=str, default="municipales", help="Election cible (ex: municipales).")
|
| 27 |
+
parser.add_argument("--target-year", type=int, default=2026, help="Année cible.")
|
| 28 |
+
parser.add_argument("--commune-code", type=str, default="301", help="Code commune pour la prédiction (Sète=301).")
|
| 29 |
+
parser.add_argument("--skip-preprocess", action="store_true", help="Ne pas relancer le prétraitement.")
|
| 30 |
+
parser.add_argument("--skip-features", action="store_true", help="Ne pas reconstruire le panel.")
|
| 31 |
+
parser.add_argument("--skip-train", action="store_true", help="Ne pas réentraîner le modèle.")
|
| 32 |
+
parser.add_argument("--skip-predict", action="store_true", help="Ne pas générer les prédictions CSV.")
|
| 33 |
+
args = parser.parse_args()
|
| 34 |
+
|
| 35 |
+
interim_path = PROJECT_ROOT / "data" / "interim" / "elections_long.parquet"
|
| 36 |
+
panel_path = PROJECT_ROOT / "data" / "processed" / "panel.parquet"
|
| 37 |
+
model_path = PROJECT_ROOT / "models" / "hist_gradient_boosting.joblib"
|
| 38 |
+
|
| 39 |
+
if not args.skip_preprocess:
|
| 40 |
+
run_step(
|
| 41 |
+
[
|
| 42 |
+
PYTHON,
|
| 43 |
+
"-m",
|
| 44 |
+
"src.data.preprocess",
|
| 45 |
+
"--raw-dir",
|
| 46 |
+
str(args.raw_dir),
|
| 47 |
+
"--output-dir",
|
| 48 |
+
str(PROJECT_ROOT / "data" / "interim"),
|
| 49 |
+
],
|
| 50 |
+
"Prétraitement (format long)",
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
+
if not args.skip_features:
|
| 54 |
+
run_step(
|
| 55 |
+
[
|
| 56 |
+
PYTHON,
|
| 57 |
+
"-m",
|
| 58 |
+
"src.features.build_features",
|
| 59 |
+
"--elections-long",
|
| 60 |
+
str(interim_path),
|
| 61 |
+
"--mapping",
|
| 62 |
+
str(args.mapping),
|
| 63 |
+
"--output",
|
| 64 |
+
str(panel_path),
|
| 65 |
+
"--output-csv",
|
| 66 |
+
str(PROJECT_ROOT / "data" / "processed" / "panel.csv"),
|
| 67 |
+
],
|
| 68 |
+
"Construction du panel features+cibles",
|
| 69 |
+
)
|
| 70 |
+
|
| 71 |
+
if not args.skip_train:
|
| 72 |
+
run_step(
|
| 73 |
+
[
|
| 74 |
+
PYTHON,
|
| 75 |
+
"-m",
|
| 76 |
+
"src.model.train",
|
| 77 |
+
"--panel",
|
| 78 |
+
str(panel_path),
|
| 79 |
+
"--reports-dir",
|
| 80 |
+
str(PROJECT_ROOT / "reports"),
|
| 81 |
+
"--models-dir",
|
| 82 |
+
str(PROJECT_ROOT / "models"),
|
| 83 |
+
],
|
| 84 |
+
"Entraînement / évaluation des modèles",
|
| 85 |
+
)
|
| 86 |
+
|
| 87 |
+
if not args.skip_predict:
|
| 88 |
+
run_step(
|
| 89 |
+
[
|
| 90 |
+
PYTHON,
|
| 91 |
+
"-m",
|
| 92 |
+
"src.model.predict",
|
| 93 |
+
"--model-path",
|
| 94 |
+
str(model_path),
|
| 95 |
+
"--feature-columns",
|
| 96 |
+
str(PROJECT_ROOT / "models" / "feature_columns.json"),
|
| 97 |
+
"--elections-long",
|
| 98 |
+
str(interim_path),
|
| 99 |
+
"--mapping",
|
| 100 |
+
str(args.mapping),
|
| 101 |
+
"--target-election-type",
|
| 102 |
+
args.target_election,
|
| 103 |
+
"--target-year",
|
| 104 |
+
str(args.target_year),
|
| 105 |
+
"--commune-code",
|
| 106 |
+
args.commune_code,
|
| 107 |
+
"--output-dir",
|
| 108 |
+
str(PROJECT_ROOT / "predictions"),
|
| 109 |
+
],
|
| 110 |
+
"Génération des prédictions CSV",
|
| 111 |
+
)
|
| 112 |
+
|
| 113 |
+
print("\nPipeline terminé. Lance Gradio avec `python -m app.gradio_app`.")
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
if __name__ == "__main__":
|
| 117 |
+
main()
|
mission.md
ADDED
|
@@ -0,0 +1,410 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Mission
|
| 2 |
+
|
| 3 |
+
## Étape 1
|
| 4 |
+
|
| 5 |
+
Nous créons un pipeline qui consiste à prendre en entrée des dataframes au format csv et qui les intègre dans une base de données.
|
| 6 |
+
|
| 7 |
+
La base de données comprends toujours la liste des bureaux de vote de toute la France et tout nouveau dataframe rajouterait des colonnes.
|
| 8 |
+
|
| 9 |
+
Dans un premier temps, on s'assure que le fichier soit importé et normalisé pour être conforme aux à la base de données pour s'assurer que la fusion puisse se passer.
|
| 10 |
+
|
| 11 |
+
Dans un second temps le dataset est fusionné.
|
| 12 |
+
|
| 13 |
+
## Ancien
|
| 14 |
+
|
| 15 |
+
Tu es OpenAI Codex dans VS Code. Tu travailles dans un repo Python existant contenant des notebooks et des données dans data/raw, data/interim, data/processed. Objectif métier : au cabinet du maire de Sète, construire un outil prédictif des prochaines municipales (ex: 2026) bureau de vote par bureau de vote, basé sur l’historique électoral et une comparaison au national, puis exposer le tout via une application Gradio. Le projet doit rester opérant à long terme pour les échéances futures (pas “codé en dur” uniquement pour 2026).
|
| 16 |
+
|
| 17 |
+
Contexte fonctionnel (à respecter strictement)
|
| 18 |
+
|
| 19 |
+
Commune principale : Sète (outil centré sur Sète). Prévoir configuration pour étendre à d’autres communes ultérieurement (sans casser l’architecture).
|
| 20 |
+
|
| 21 |
+
L’utilisateur de Gradio choisit :
|
| 22 |
+
|
| 23 |
+
un bureau de vote
|
| 24 |
+
|
| 25 |
+
une élection cible à observer (par défaut : municipales 2026, mais l’UI et le backend doivent accepter n’importe quel couple (type, année) présent / futur)
|
| 26 |
+
|
| 27 |
+
Gradio renvoie :
|
| 28 |
+
|
| 29 |
+
le score prédit (%) pour chaque catégorie de candidats
|
| 30 |
+
|
| 31 |
+
entre parenthèses à côté de chaque score, la différence (en points) vs :
|
| 32 |
+
|
| 33 |
+
la dernière élection législative avant l’élection cible (dans le contexte “municipales 2026”, c’est typiquement les législatives les plus récentes avant 2026)
|
| 34 |
+
|
| 35 |
+
les municipales 2020
|
| 36 |
+
|
| 37 |
+
Catégories à utiliser (cibles et affichage) :
|
| 38 |
+
|
| 39 |
+
centre
|
| 40 |
+
|
| 41 |
+
gauche_modere
|
| 42 |
+
|
| 43 |
+
droite_modere
|
| 44 |
+
|
| 45 |
+
gauche_dure
|
| 46 |
+
|
| 47 |
+
droite_dure
|
| 48 |
+
|
| 49 |
+
extreme_gauche
|
| 50 |
+
|
| 51 |
+
extreme_droite
|
| 52 |
+
|
| 53 |
+
Données & notebooks existants
|
| 54 |
+
|
| 55 |
+
Les fichiers 01_pretraitement et 02_feature_engineering existent (notebooks dans notebooks/) et ont déjà fait un premier nettoyage / feature engineering.
|
| 56 |
+
|
| 57 |
+
Étape 1 : vérifier que ces notebooks sont cohérents avec l’objectif final (prédire municipales 2026 + long terme + bureau par bureau + comparaisons national/local), puis industrialiser : extraire la logique dans des modules Python versionnés sous src/.
|
| 58 |
+
|
| 59 |
+
Les datasets bruts sont dans data/raw. data/interim et data/processed sont disponibles et doivent être utilisés si pertinents (ne pas refaire inutilement ce qui existe déjà, mais corriger si c’est incohérent).
|
| 60 |
+
|
| 61 |
+
Exigences méthodologiques non négociables
|
| 62 |
+
1) Anti-fuite temporelle (time leakage)
|
| 63 |
+
|
| 64 |
+
Pour prédire une élection cible (type, année = T), les features doivent être calculées uniquement avec des données strictement antérieures à T.
|
| 65 |
+
|
| 66 |
+
Interdiction d’utiliser des résultats de l’élection cible dans les features.
|
| 67 |
+
|
| 68 |
+
Les “écarts au national” doivent être calculés uniquement pour des élections antérieures, avec le score national correspondant à ces élections antérieures.
|
| 69 |
+
|
| 70 |
+
La validation doit respecter la causalité (split temporel).
|
| 71 |
+
|
| 72 |
+
2) Structure des données adaptée (panel)
|
| 73 |
+
|
| 74 |
+
Ne pas rester sur “1 ligne = 1 bureau” wide naïf si cela empêche l’apprentissage.
|
| 75 |
+
Implémenter un dataset panel conceptuellement : 1 ligne = (bureau, election_type, election_year) avec :
|
| 76 |
+
|
| 77 |
+
cibles : parts de voix (%) par catégorie
|
| 78 |
+
|
| 79 |
+
features : historiques laggés, écarts national antérieurs, participation antérieure, etc.
|
| 80 |
+
|
| 81 |
+
3) Contraintes de sortie
|
| 82 |
+
|
| 83 |
+
Les prédictions sont des % par catégorie :
|
| 84 |
+
|
| 85 |
+
clip à [0, 100]
|
| 86 |
+
|
| 87 |
+
renormaliser pour sommer à 100 (gérer somme=0)
|
| 88 |
+
Alternative bonus : modéliser via log-ratios + softmax, mais renormalisation simple acceptable.
|
| 89 |
+
|
| 90 |
+
Étape 1 — Audit & industrialisation des notebooks
|
| 91 |
+
|
| 92 |
+
Lire et analyser notebooks/01_pretraitement.* et notebooks/02_feature_engineering.*.
|
| 93 |
+
|
| 94 |
+
Produire un diagnostic succinct (dans reports/notebook_audit.md) :
|
| 95 |
+
|
| 96 |
+
quelles tables/colonnes sont produites ?
|
| 97 |
+
|
| 98 |
+
est-ce compatible avec “bureau×élection” ?
|
| 99 |
+
|
| 100 |
+
existe-t-il des risques de leakage ?
|
| 101 |
+
|
| 102 |
+
est-ce centré sur Sète ou multi-communes ?
|
| 103 |
+
|
| 104 |
+
Refactorer en code production :
|
| 105 |
+
|
| 106 |
+
src/data/preprocess.py : chargement, nettoyage, normalisation des identifiants (commune, bureau), harmonisation des colonnes, gestion des tours (si présents).
|
| 107 |
+
|
| 108 |
+
src/features/build_features.py : construction des features “safe” et panel dataset.
|
| 109 |
+
|
| 110 |
+
Scripts CLI : python -m src.data.preprocess ..., python -m src.features.build_features ...
|
| 111 |
+
|
| 112 |
+
Générer (ou régénérer si nécessaire) un dataset final standard :
|
| 113 |
+
|
| 114 |
+
data/processed/panel.parquet
|
| 115 |
+
|
| 116 |
+
et un dictionnaire de données data/processed/data_dictionary.md
|
| 117 |
+
|
| 118 |
+
Étape 2 — Base PostgreSQL pour l’historique (utilisée par Gradio)
|
| 119 |
+
|
| 120 |
+
Construire une base PostgreSQL (docker-compose recommandé) qui stocke l’historique complet et permet de requêter rapidement par bureau.
|
| 121 |
+
|
| 122 |
+
2.1 Livrables techniques DB
|
| 123 |
+
|
| 124 |
+
docker-compose.yml lançant Postgres + un outil admin optionnel (pgAdmin facultatif).
|
| 125 |
+
|
| 126 |
+
.env.example pour config DB (host, port, user, password, dbname).
|
| 127 |
+
|
| 128 |
+
Schéma SQL (via Alembic OU SQLAlchemy create_all) versionné dans src/db/.
|
| 129 |
+
|
| 130 |
+
2.2 Modèle de données (proposition minimale à implémenter)
|
| 131 |
+
|
| 132 |
+
Tables conseillées (adapter si nécessaire, mais rester normalisé) :
|
| 133 |
+
|
| 134 |
+
communes : id, name_normalized, insee_code (si dispo)
|
| 135 |
+
|
| 136 |
+
bureaux : id, commune_id, bureau_code, bureau_label (si dispo), UNIQUE(commune_id, bureau_code)
|
| 137 |
+
|
| 138 |
+
elections : id, election_type, election_year, round (nullable), date (nullable), UNIQUE(type, year, round)
|
| 139 |
+
|
| 140 |
+
categories : id, name (les 7 catégories)
|
| 141 |
+
|
| 142 |
+
results_local : id, bureau_id, election_id, category_id, share_pct, votes (nullable), expressed (nullable), turnout_pct (nullable)
|
| 143 |
+
|
| 144 |
+
results_national : id, election_id, category_id, share_pct, votes (nullable), expressed (nullable), turnout_pct (nullable)
|
| 145 |
+
|
| 146 |
+
2.3 Ingestion / ETL vers Postgres
|
| 147 |
+
|
| 148 |
+
Créer src/db/ingest.py :
|
| 149 |
+
|
| 150 |
+
lit les données depuis data/processed (préféré) sinon reconstruit depuis data/raw via preprocess + features.
|
| 151 |
+
|
| 152 |
+
insère/upsère idempotent :
|
| 153 |
+
|
| 154 |
+
communes, bureaux, elections, categories
|
| 155 |
+
|
| 156 |
+
résultats locaux et nationaux
|
| 157 |
+
|
| 158 |
+
logs clairs + contrôles de cohérence (ex: somme des parts ≈ 100, votes ≤ exprimés, etc.)
|
| 159 |
+
|
| 160 |
+
script CLI : python -m src.db.ingest --input data/processed/panel.parquet
|
| 161 |
+
|
| 162 |
+
Étape 3 — Modélisation & prédiction
|
| 163 |
+
|
| 164 |
+
Construire un entraînement robuste + stockage des artefacts + prédiction par bureau.
|
| 165 |
+
|
| 166 |
+
3.1 Cibles
|
| 167 |
+
|
| 168 |
+
Multi-sorties : target_share_<categorie> pour les 7 catégories.
|
| 169 |
+
|
| 170 |
+
3.2 Features attendues (au minimum)
|
| 171 |
+
|
| 172 |
+
Pour une ligne (bureau, type, year=T) :
|
| 173 |
+
|
| 174 |
+
historiques laggés par catégorie (antérieurs à T)
|
| 175 |
+
|
| 176 |
+
prev_share_<cat>_any_lag1
|
| 177 |
+
|
| 178 |
+
prev_share_<cat>_<type>_lag1 (si existant)
|
| 179 |
+
|
| 180 |
+
écarts au national sur historiques :
|
| 181 |
+
|
| 182 |
+
prev_dev_to_national_<cat>_any_lag1 = prev_share_bureau - prev_share_national (sur l’élection antérieure utilisée)
|
| 183 |
+
|
| 184 |
+
ou par type si disponible
|
| 185 |
+
|
| 186 |
+
participation / abstention historiques si dispos :
|
| 187 |
+
|
| 188 |
+
prev_turnout_any_lag1, etc.
|
| 189 |
+
|
| 190 |
+
variables “swing” :
|
| 191 |
+
|
| 192 |
+
swing_<cat> = prev_share_lag1 - prev_share_lag2 (si lag2 existe)
|
| 193 |
+
|
| 194 |
+
Toutes ces features doivent être calculées sans fuite (join-asof temporel ou logique équivalente).
|
| 195 |
+
|
| 196 |
+
3.3 Split & évaluation (obligatoire)
|
| 197 |
+
|
| 198 |
+
Interdiction de random split.
|
| 199 |
+
|
| 200 |
+
Implémenter une évaluation temporelle paramétrable, ex :
|
| 201 |
+
|
| 202 |
+
train <= 2017, valid 2019–2021, test >= 2022 (exemple : configurable)
|
| 203 |
+
|
| 204 |
+
Métriques :
|
| 205 |
+
|
| 206 |
+
MAE moyenne sur les 7 catégories
|
| 207 |
+
|
| 208 |
+
MAE par catégorie
|
| 209 |
+
|
| 210 |
+
option : erreur sur “catégorie gagnante”
|
| 211 |
+
|
| 212 |
+
Générer :
|
| 213 |
+
|
| 214 |
+
reports/metrics.json
|
| 215 |
+
|
| 216 |
+
reports/metrics.md
|
| 217 |
+
|
| 218 |
+
quelques figures (matplotlib) dans reports/figures/
|
| 219 |
+
|
| 220 |
+
3.4 Modèles à entraîner
|
| 221 |
+
|
| 222 |
+
Implémenter au moins :
|
| 223 |
+
|
| 224 |
+
Ridge (baseline interprétable) avec standardisation
|
| 225 |
+
|
| 226 |
+
HistGradientBoostingRegressor (via MultiOutputRegressor si nécessaire)
|
| 227 |
+
|
| 228 |
+
LightGBM / XGBoost / CatBoost si installés (détection automatique, sinon skip proprement)
|
| 229 |
+
|
| 230 |
+
Sauvegarder modèles et preprocessors dans models/ (joblib), avec un model_card.md (date, données, split, features, métriques).
|
| 231 |
+
|
| 232 |
+
3.5 Prédiction pour une élection cible
|
| 233 |
+
|
| 234 |
+
Créer src/model/predict.py :
|
| 235 |
+
|
| 236 |
+
arguments : --target-election-type, --target-year, --commune (par défaut Sète)
|
| 237 |
+
|
| 238 |
+
produit un CSV :
|
| 239 |
+
|
| 240 |
+
predictions/pred_<type>_<year>_sete.csv
|
| 241 |
+
|
| 242 |
+
colonnes : commune, bureau_code, predicted_share_ (7), + comparateurs (voir ci-dessous)
|
| 243 |
+
|
| 244 |
+
Comparateurs à afficher dans Gradio
|
| 245 |
+
|
| 246 |
+
Pour chaque catégorie, calculer 2 deltas (points de %):
|
| 247 |
+
|
| 248 |
+
vs la dernière législative avant l’élection cible
|
| 249 |
+
|
| 250 |
+
trouver dans la DB l’élection election_type='legislatives' avec année max < target_year (et même round logique si géré)
|
| 251 |
+
|
| 252 |
+
récupérer le share_pct du bureau sur cette législative (par catégorie)
|
| 253 |
+
|
| 254 |
+
delta_leg = predicted_share - share_leg
|
| 255 |
+
|
| 256 |
+
vs les municipales 2020
|
| 257 |
+
|
| 258 |
+
si target_year != 2020 : récupérer election_type='municipales' et election_year=2020 pour ce bureau
|
| 259 |
+
|
| 260 |
+
delta_mun2020 = predicted_share - share_mun2020
|
| 261 |
+
Si une référence manque (bureau absent, données manquantes), afficher “N/A” au lieu du delta.
|
| 262 |
+
|
| 263 |
+
Étape 4 — Application Gradio
|
| 264 |
+
|
| 265 |
+
Créer une app Gradio production-ready dans app/gradio_app.py.
|
| 266 |
+
|
| 267 |
+
4.1 UI
|
| 268 |
+
|
| 269 |
+
Titre : “Prévision Municipales — Ville de Sète”
|
| 270 |
+
|
| 271 |
+
Inputs :
|
| 272 |
+
|
| 273 |
+
Dropdown bureau : liste des bureaux disponibles pour Sète (requête DB)
|
| 274 |
+
|
| 275 |
+
Dropdown election : couples (type, année) cibles (par défaut municipale 2026, mais liste configurable). Si 2026 n’existe pas en DB, elle doit pouvoir être sélectionnée quand même comme “cible future”.
|
| 276 |
+
|
| 277 |
+
Bouton : “Prédire”
|
| 278 |
+
|
| 279 |
+
4.2 Sorties
|
| 280 |
+
|
| 281 |
+
Afficher :
|
| 282 |
+
|
| 283 |
+
Un tableau (pandas dataframe ou composant gradio) avec 7 lignes (catégories) :
|
| 284 |
+
|
| 285 |
+
categorie
|
| 286 |
+
|
| 287 |
+
score_predit_%
|
| 288 |
+
|
| 289 |
+
Δ vs législatives (dernières) (en points)
|
| 290 |
+
|
| 291 |
+
Δ vs municipales 2020 (en points)
|
| 292 |
+
|
| 293 |
+
Option bonus : un bar chart matplotlib des scores prédits par catégorie (simple, lisible).
|
| 294 |
+
|
| 295 |
+
Format texte exigé (si rendu texte au lieu de tableau) :
|
| 296 |
+
|
| 297 |
+
centre : 21.3% (+1.2 vs législatives, -0.8 vs mun 2020)
|
| 298 |
+
|
| 299 |
+
et ainsi de suite
|
| 300 |
+
Avec N/A si delta indisponible.
|
| 301 |
+
|
| 302 |
+
4.3 Backend
|
| 303 |
+
|
| 304 |
+
L’app ne doit pas recalculer tout le dataset à chaque clic.
|
| 305 |
+
|
| 306 |
+
Au démarrage :
|
| 307 |
+
|
| 308 |
+
se connecte à Postgres
|
| 309 |
+
|
| 310 |
+
charge le modèle entraîné + preprocessor
|
| 311 |
+
|
| 312 |
+
Lors d’une prédiction :
|
| 313 |
+
|
| 314 |
+
récupère les features “safe” du bureau pour la cible (type, année) :
|
| 315 |
+
|
| 316 |
+
soit via une table features pré-calculées,
|
| 317 |
+
|
| 318 |
+
soit en construisant “à la volée” depuis l’historique DB (mais de manière efficace et sans fuite)
|
| 319 |
+
|
| 320 |
+
applique modèle → prédictions → post-traitement (clip + renormalisation)
|
| 321 |
+
|
| 322 |
+
calcule deltas vs références (législatives max<target_year, municipales 2020)
|
| 323 |
+
|
| 324 |
+
renvoie la table + graph
|
| 325 |
+
|
| 326 |
+
Architecture attendue du repo
|
| 327 |
+
|
| 328 |
+
Créer / compléter l’arborescence :
|
| 329 |
+
|
| 330 |
+
src/
|
| 331 |
+
|
| 332 |
+
data/
|
| 333 |
+
|
| 334 |
+
features/
|
| 335 |
+
|
| 336 |
+
db/
|
| 337 |
+
|
| 338 |
+
model/
|
| 339 |
+
|
| 340 |
+
utils/
|
| 341 |
+
|
| 342 |
+
app/
|
| 343 |
+
|
| 344 |
+
gradio_app.py
|
| 345 |
+
|
| 346 |
+
data/raw/ (existant)
|
| 347 |
+
|
| 348 |
+
data/interim/ (existant)
|
| 349 |
+
|
| 350 |
+
data/processed/ (existant)
|
| 351 |
+
|
| 352 |
+
models/
|
| 353 |
+
|
| 354 |
+
predictions/
|
| 355 |
+
|
| 356 |
+
reports/
|
| 357 |
+
|
| 358 |
+
notebooks/ (existant)
|
| 359 |
+
|
| 360 |
+
Inclure :
|
| 361 |
+
|
| 362 |
+
README.md très clair avec commandes :
|
| 363 |
+
|
| 364 |
+
(a) preprocess/build_features
|
| 365 |
+
|
| 366 |
+
(b) lancer Postgres
|
| 367 |
+
|
| 368 |
+
(c) ingest DB
|
| 369 |
+
|
| 370 |
+
(d) train/evaluate
|
| 371 |
+
|
| 372 |
+
(e) lancer Gradio
|
| 373 |
+
|
| 374 |
+
requirements.txt ou pyproject.toml
|
| 375 |
+
|
| 376 |
+
logs (INFO) + messages d’erreur actionnables (ex : DB down, modèle absent, fichiers manquants)
|
| 377 |
+
|
| 378 |
+
code robuste si data/raw vide : doit expliquer quoi mettre et comment nommer.
|
| 379 |
+
|
| 380 |
+
Points d’attention “réels”
|
| 381 |
+
|
| 382 |
+
gérer bureaux absents certaines années → imputation + deltas N/A
|
| 383 |
+
|
| 384 |
+
gérer harmonisation des libellés bureau → normalisation + warning
|
| 385 |
+
|
| 386 |
+
gérer tours (T1/T2) : inclure colonne round ou config, et éviter mélange non intentionnel
|
| 387 |
+
|
| 388 |
+
le mapping “candidat/nuance -> catégorie” est critique :
|
| 389 |
+
|
| 390 |
+
prévoir data/mappings/category_mapping.csv (ou YAML) et documenter la logique
|
| 391 |
+
|
| 392 |
+
tout non-mappé -> autres puis redistribuer/ignorer selon règle explicite (mais comme les catégories sont imposées, définir une stratégie : soit exclure “autres” du modèle, soit le répartir, soit le conserver et renormaliser sur 7 catégories — choisir une approche et la documenter)
|
| 393 |
+
|
| 394 |
+
Livrables finaux attendus
|
| 395 |
+
|
| 396 |
+
Code complet (modules + scripts CLI)
|
| 397 |
+
|
| 398 |
+
Schéma DB + docker-compose + script ingestion
|
| 399 |
+
|
| 400 |
+
Pipeline entraînement/évaluation + artefacts modèles
|
| 401 |
+
|
| 402 |
+
Application Gradio fonctionnelle
|
| 403 |
+
|
| 404 |
+
Exemples de fichiers mapping :
|
| 405 |
+
|
| 406 |
+
data/mappings/category_mapping.csv
|
| 407 |
+
|
| 408 |
+
Documentation complète dans README
|
| 409 |
+
|
| 410 |
+
Ne pas inventer de données. Travailler avec l’existant (data/interim, data/processed, notebooks), corriger si incohérent, et rendre l’ensemble production-ready (reproductible, configurable, sans fuite temporelle).
|
models/best_model.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "hist_gradient_boosting"
|
| 3 |
+
}
|
models/feature_columns.json
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
"prev_share_any_lag1_centre",
|
| 3 |
+
"prev_share_any_lag1_droite_dure",
|
| 4 |
+
"prev_share_any_lag1_droite_modere",
|
| 5 |
+
"prev_share_any_lag1_extreme_droite",
|
| 6 |
+
"prev_share_any_lag1_extreme_gauche",
|
| 7 |
+
"prev_share_any_lag1_gauche_dure",
|
| 8 |
+
"prev_share_any_lag1_gauche_modere",
|
| 9 |
+
"prev_share_type_lag1_centre",
|
| 10 |
+
"prev_share_type_lag1_droite_dure",
|
| 11 |
+
"prev_share_type_lag1_droite_modere",
|
| 12 |
+
"prev_share_type_lag1_extreme_droite",
|
| 13 |
+
"prev_share_type_lag1_extreme_gauche",
|
| 14 |
+
"prev_share_type_lag1_gauche_dure",
|
| 15 |
+
"prev_share_type_lag1_gauche_modere",
|
| 16 |
+
"prev_dev_to_national_any_lag1_centre",
|
| 17 |
+
"prev_dev_to_national_any_lag1_droite_dure",
|
| 18 |
+
"prev_dev_to_national_any_lag1_droite_modere",
|
| 19 |
+
"prev_dev_to_national_any_lag1_extreme_droite",
|
| 20 |
+
"prev_dev_to_national_any_lag1_extreme_gauche",
|
| 21 |
+
"prev_dev_to_national_any_lag1_gauche_dure",
|
| 22 |
+
"prev_dev_to_national_any_lag1_gauche_modere",
|
| 23 |
+
"prev_dev_to_national_type_lag1_centre",
|
| 24 |
+
"prev_dev_to_national_type_lag1_droite_dure",
|
| 25 |
+
"prev_dev_to_national_type_lag1_droite_modere",
|
| 26 |
+
"prev_dev_to_national_type_lag1_extreme_droite",
|
| 27 |
+
"prev_dev_to_national_type_lag1_extreme_gauche",
|
| 28 |
+
"prev_dev_to_national_type_lag1_gauche_dure",
|
| 29 |
+
"prev_dev_to_national_type_lag1_gauche_modere",
|
| 30 |
+
"swing_any_centre",
|
| 31 |
+
"swing_any_droite_dure",
|
| 32 |
+
"swing_any_droite_modere",
|
| 33 |
+
"swing_any_extreme_droite",
|
| 34 |
+
"swing_any_extreme_gauche",
|
| 35 |
+
"swing_any_gauche_dure",
|
| 36 |
+
"swing_any_gauche_modere",
|
| 37 |
+
"turnout_pct",
|
| 38 |
+
"prev_turnout_any_lag1",
|
| 39 |
+
"prev_turnout_same_type_lag1"
|
| 40 |
+
]
|
models/hist_gradient_boosting.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:91189f0a1fa5876b60b75e54293f093023d12f1f32ee5e3076aa648659bf7afd
|
| 3 |
+
size 2676501
|
models/model_card.md
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Model card
|
| 2 |
+
- Modèle: hist_gradient_boosting
|
| 3 |
+
- Split temporel: train<= 2019, valid<= 2021, test>= 2022
|
| 4 |
+
- Features: 38 colonnes numériques (lags, écarts national, swing, turnout)
|
| 5 |
+
- Cibles: parts par bloc (7 catégories) renormalisées.
|
| 6 |
+
- Métriques principales (MAE moyen, jeux valid/test):
|
| 7 |
+
- Valid: 0.1233
|
| 8 |
+
- Test: 0.1146
|
requirements.txt
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
pandas>=2.2.0
|
| 2 |
+
numpy>=1.26.0
|
| 3 |
+
sqlalchemy>=2.0.0
|
| 4 |
+
psycopg2-binary>=2.9.9
|
| 5 |
+
gradio>=4.0.0
|
| 6 |
+
pyarrow>=15.0.0
|
| 7 |
+
scikit-learn>=1.4.0
|
| 8 |
+
# Modèles gradient boosting / multi-output recommandés pour la prédiction bureau de vote
|
| 9 |
+
lightgbm>=4.3.0
|
| 10 |
+
xgboost>=2.0.0
|
| 11 |
+
catboost>=1.2.5
|
| 12 |
+
shap>=0.45.0
|
| 13 |
+
pyyaml>=6.0.0
|
| 14 |
+
matplotlib>=3.8.0
|
| 15 |
+
folium>=0.16.0
|
src/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# Makes src a package so notebooks can import src.data_prep
|
src/constants.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
# Canonical blocs/categories to surface in the app outputs (7 cibles)
|
| 4 |
+
CANDIDATE_CATEGORIES = [
|
| 5 |
+
"centre",
|
| 6 |
+
"gauche_modere",
|
| 7 |
+
"droite_modere",
|
| 8 |
+
"gauche_dure",
|
| 9 |
+
"droite_dure",
|
| 10 |
+
"extreme_gauche",
|
| 11 |
+
"extreme_droite",
|
| 12 |
+
]
|
| 13 |
+
|
| 14 |
+
# Numeric columns used across the pipeline and DB ingestion
|
| 15 |
+
NUMERIC_COLUMNS = [
|
| 16 |
+
"voix_bloc",
|
| 17 |
+
"exprimes",
|
| 18 |
+
"inscrits",
|
| 19 |
+
"votants",
|
| 20 |
+
"blancs",
|
| 21 |
+
"nuls",
|
| 22 |
+
"part_bloc",
|
| 23 |
+
"part_bloc_national",
|
| 24 |
+
"taux_participation_national",
|
| 25 |
+
"taux_participation_bv",
|
| 26 |
+
"taux_blancs_bv",
|
| 27 |
+
"taux_nuls_bv",
|
| 28 |
+
"ecart_bloc_vs_national",
|
| 29 |
+
"ecart_participation_vs_nat",
|
| 30 |
+
"croissance_inscrits_depuis_base",
|
| 31 |
+
"part_bloc_lag1",
|
| 32 |
+
"ecart_bloc_vs_national_lag1",
|
| 33 |
+
"taux_participation_bv_lag1",
|
| 34 |
+
"annee_centre",
|
| 35 |
+
]
|
src/data/__init__.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Data subpackage: preprocessing helpers and CLI entrypoints.
|
| 3 |
+
"""
|
src/data/preprocess.py
ADDED
|
@@ -0,0 +1,481 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import argparse
|
| 4 |
+
import json
|
| 5 |
+
import logging
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
from typing import Any, Dict, Iterable, Mapping
|
| 8 |
+
|
| 9 |
+
import pandas as pd
|
| 10 |
+
|
| 11 |
+
from src import data_prep
|
| 12 |
+
|
| 13 |
+
LOGGER = logging.getLogger(__name__)
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
DEFAULT_META_CONFIG: Dict[str, Dict[str, Any]] = {
|
| 17 |
+
"14_EU.csv": {
|
| 18 |
+
"type_scrutin": "europeennes",
|
| 19 |
+
"date_scrutin": "2014-05-25",
|
| 20 |
+
"tour_column": "N° tour",
|
| 21 |
+
"code_bv_cols": ["Code de la commune", "N° de bureau de vote"],
|
| 22 |
+
"rename_map": {
|
| 23 |
+
"Inscrits": "inscrits",
|
| 24 |
+
"Votants": "votants",
|
| 25 |
+
"Exprimés": "exprimes",
|
| 26 |
+
"Exprimés": "exprimes",
|
| 27 |
+
"Nombre de voix du candidat": "voix",
|
| 28 |
+
"Voix": "voix",
|
| 29 |
+
"Nom du candidat": "nom_candidature",
|
| 30 |
+
"Prénom du candidat": "nom_candidature",
|
| 31 |
+
"Code nuance du candidat": "code_candidature",
|
| 32 |
+
},
|
| 33 |
+
},
|
| 34 |
+
"14_MN14_T1T2.csv": {
|
| 35 |
+
"type_scrutin": "municipales",
|
| 36 |
+
"date_scrutin": "2014-03-23",
|
| 37 |
+
"tour_column": "N° tour",
|
| 38 |
+
"code_bv_cols": ["Code commune", "N° de bureau de vote"],
|
| 39 |
+
"rename_map": {
|
| 40 |
+
"Inscrits": "inscrits",
|
| 41 |
+
"Votants": "votants",
|
| 42 |
+
"Exprimés": "exprimes",
|
| 43 |
+
"Nombre de voix": "voix",
|
| 44 |
+
"Nom du candidat tête de liste": "nom_candidature",
|
| 45 |
+
"Prénom du candidat tête de liste": "nom_candidature",
|
| 46 |
+
"Code nuance de la liste": "code_candidature",
|
| 47 |
+
},
|
| 48 |
+
},
|
| 49 |
+
"17_L_T1.csv": {
|
| 50 |
+
"type_scrutin": "legislatives",
|
| 51 |
+
"date_scrutin": "2017-06-11",
|
| 52 |
+
"tour": 1,
|
| 53 |
+
"code_bv_cols": ["Code de la commune", "Code du b.vote"],
|
| 54 |
+
"rename_map": {
|
| 55 |
+
"Inscrits": "inscrits",
|
| 56 |
+
"Abstentions": "abstentions",
|
| 57 |
+
"Votants": "votants",
|
| 58 |
+
"Blancs": "blancs",
|
| 59 |
+
"Nuls": "nuls",
|
| 60 |
+
"Exprimés": "exprimes",
|
| 61 |
+
"Voix": "voix",
|
| 62 |
+
"Nuance": "code_candidature",
|
| 63 |
+
"Nom": "nom_candidature",
|
| 64 |
+
},
|
| 65 |
+
},
|
| 66 |
+
"17_L_T2.csv": {
|
| 67 |
+
"type_scrutin": "legislatives",
|
| 68 |
+
"date_scrutin": "2017-06-18",
|
| 69 |
+
"tour": 2,
|
| 70 |
+
"code_bv_cols": ["Code de la commune", "Code du b.vote"],
|
| 71 |
+
"rename_map": {
|
| 72 |
+
"Inscrits": "inscrits",
|
| 73 |
+
"Abstentions": "abstentions",
|
| 74 |
+
"Votants": "votants",
|
| 75 |
+
"Blancs": "blancs",
|
| 76 |
+
"Nuls": "nuls",
|
| 77 |
+
"Exprimés": "exprimes",
|
| 78 |
+
"Voix": "voix",
|
| 79 |
+
"Nuance": "code_candidature",
|
| 80 |
+
"Nom": "nom_candidature",
|
| 81 |
+
},
|
| 82 |
+
},
|
| 83 |
+
"17_PR_T1.csv": {
|
| 84 |
+
"type_scrutin": "presidentielles",
|
| 85 |
+
"date_scrutin": "2017-04-23",
|
| 86 |
+
"tour": 1,
|
| 87 |
+
"code_bv_cols": ["Code de la commune", "Code du b.vote"],
|
| 88 |
+
"rename_map": {
|
| 89 |
+
"Inscrits": "inscrits",
|
| 90 |
+
"Abstentions": "abstentions",
|
| 91 |
+
"Votants": "votants",
|
| 92 |
+
"Blancs": "blancs",
|
| 93 |
+
"Nuls": "nuls",
|
| 94 |
+
"Exprimés": "exprimes",
|
| 95 |
+
"Voix": "voix",
|
| 96 |
+
"Nom": "nom_candidature",
|
| 97 |
+
"Code nuance du candidat": "code_candidature",
|
| 98 |
+
},
|
| 99 |
+
},
|
| 100 |
+
"17_PR_T2.csv": {
|
| 101 |
+
"type_scrutin": "presidentielles",
|
| 102 |
+
"date_scrutin": "2017-05-07",
|
| 103 |
+
"tour": 2,
|
| 104 |
+
"code_bv_cols": ["Code de la commune", "Code du b.vote"],
|
| 105 |
+
"rename_map": {
|
| 106 |
+
"Inscrits": "inscrits",
|
| 107 |
+
"Abstentions": "abstentions",
|
| 108 |
+
"Votants": "votants",
|
| 109 |
+
"Blancs": "blancs",
|
| 110 |
+
"Nuls": "nuls",
|
| 111 |
+
"Exprimés": "exprimes",
|
| 112 |
+
"Voix": "voix",
|
| 113 |
+
"Nom": "nom_candidature",
|
| 114 |
+
"Code nuance du candidat": "code_candidature",
|
| 115 |
+
},
|
| 116 |
+
},
|
| 117 |
+
"19_EU.csv": {
|
| 118 |
+
"type_scrutin": "europeennes",
|
| 119 |
+
"date_scrutin": "2019-05-26",
|
| 120 |
+
"tour": 1,
|
| 121 |
+
"code_bv_cols": ["Code de la commune", "Code du b.vote"],
|
| 122 |
+
"rename_map": {
|
| 123 |
+
"Inscrits": "inscrits",
|
| 124 |
+
"Abstentions": "abstentions",
|
| 125 |
+
"Votants": "votants",
|
| 126 |
+
"Blancs": "blancs",
|
| 127 |
+
"Nuls": "nuls",
|
| 128 |
+
"Exprimés": "exprimes",
|
| 129 |
+
"Voix": "voix",
|
| 130 |
+
"Nom Tête de Liste": "nom_candidature",
|
| 131 |
+
"Nuance Liste": "code_candidature",
|
| 132 |
+
},
|
| 133 |
+
},
|
| 134 |
+
"20_MN_T1.csv": {
|
| 135 |
+
"type_scrutin": "municipales",
|
| 136 |
+
"date_scrutin": "2020-03-15",
|
| 137 |
+
"tour": 1,
|
| 138 |
+
"sep": ";",
|
| 139 |
+
"code_bv_cols": ["Code de la commune", "Code B.Vote"],
|
| 140 |
+
"rename_map": {
|
| 141 |
+
"Inscrits": "inscrits",
|
| 142 |
+
"Abstentions": "abstentions",
|
| 143 |
+
"Votants": "votants",
|
| 144 |
+
"Blancs": "blancs",
|
| 145 |
+
"Nuls": "nuls",
|
| 146 |
+
"Exprimés": "exprimes",
|
| 147 |
+
"Voix": "voix",
|
| 148 |
+
"Nom": "nom_candidature",
|
| 149 |
+
"Liste": "nom_candidature",
|
| 150 |
+
"Code Nuance": "code_candidature",
|
| 151 |
+
},
|
| 152 |
+
},
|
| 153 |
+
"20_MN_T2.csv": {
|
| 154 |
+
"type_scrutin": "municipales",
|
| 155 |
+
"date_scrutin": "2020-06-28",
|
| 156 |
+
"tour": 2,
|
| 157 |
+
"code_bv_cols": ["Code de la commune", "Code B.Vote"],
|
| 158 |
+
"rename_map": {
|
| 159 |
+
"Inscrits": "inscrits",
|
| 160 |
+
"Abstentions": "abstentions",
|
| 161 |
+
"Votants": "votants",
|
| 162 |
+
"Blancs": "blancs",
|
| 163 |
+
"Nuls": "nuls",
|
| 164 |
+
"Exprimés": "exprimes",
|
| 165 |
+
"Voix": "voix",
|
| 166 |
+
"Nom": "nom_candidature",
|
| 167 |
+
"Liste": "nom_candidature",
|
| 168 |
+
"Code Nuance": "code_candidature",
|
| 169 |
+
},
|
| 170 |
+
},
|
| 171 |
+
"21_DEP_T1.csv": {
|
| 172 |
+
"type_scrutin": "departementales",
|
| 173 |
+
"date_scrutin": "2021-06-20",
|
| 174 |
+
"tour": 1,
|
| 175 |
+
"code_bv_cols": ["Code de la commune", "Code du b.vote"],
|
| 176 |
+
"rename_map": {
|
| 177 |
+
"Inscrits": "inscrits",
|
| 178 |
+
"Abstentions": "abstentions",
|
| 179 |
+
"Votants": "votants",
|
| 180 |
+
"Blancs": "blancs",
|
| 181 |
+
"Nuls": "nuls",
|
| 182 |
+
"Exprimés": "exprimes",
|
| 183 |
+
"Voix": "voix",
|
| 184 |
+
"Nuance": "code_candidature",
|
| 185 |
+
"Binôme": "nom_candidature",
|
| 186 |
+
},
|
| 187 |
+
},
|
| 188 |
+
"21_DEP_T2.csv": {
|
| 189 |
+
"type_scrutin": "departementales",
|
| 190 |
+
"date_scrutin": "2021-06-27",
|
| 191 |
+
"tour": 2,
|
| 192 |
+
"code_bv_cols": ["Code de la commune", "Code du b.vote"],
|
| 193 |
+
"rename_map": {
|
| 194 |
+
"Inscrits": "inscrits",
|
| 195 |
+
"Abstentions": "abstentions",
|
| 196 |
+
"Votants": "votants",
|
| 197 |
+
"Blancs": "blancs",
|
| 198 |
+
"Nuls": "nuls",
|
| 199 |
+
"Exprimés": "exprimes",
|
| 200 |
+
"Voix": "voix",
|
| 201 |
+
"Nuance": "code_candidature",
|
| 202 |
+
"Binôme": "nom_candidature",
|
| 203 |
+
},
|
| 204 |
+
},
|
| 205 |
+
"21_REG_T1.csv": {
|
| 206 |
+
"type_scrutin": "regionales",
|
| 207 |
+
"date_scrutin": "2021-06-20",
|
| 208 |
+
"tour": 1,
|
| 209 |
+
"code_bv_cols": ["Code de la commune", "Code du b.vote"],
|
| 210 |
+
"rename_map": {
|
| 211 |
+
"Inscrits": "inscrits",
|
| 212 |
+
"Abstentions": "abstentions",
|
| 213 |
+
"Votants": "votants",
|
| 214 |
+
"Blancs": "blancs",
|
| 215 |
+
"Nuls": "nuls",
|
| 216 |
+
"Exprimés": "exprimes",
|
| 217 |
+
"Voix": "voix",
|
| 218 |
+
"Nuance Liste": "code_candidature",
|
| 219 |
+
"Libellé Abrégé Liste": "nom_candidature",
|
| 220 |
+
},
|
| 221 |
+
},
|
| 222 |
+
"21_REG_T2.csv": {
|
| 223 |
+
"type_scrutin": "regionales",
|
| 224 |
+
"date_scrutin": "2021-06-27",
|
| 225 |
+
"tour": 2,
|
| 226 |
+
"code_bv_cols": ["Code de la commune", "Code du b.vote"],
|
| 227 |
+
"rename_map": {
|
| 228 |
+
"Inscrits": "inscrits",
|
| 229 |
+
"Abstentions": "abstentions",
|
| 230 |
+
"Votants": "votants",
|
| 231 |
+
"Blancs": "blancs",
|
| 232 |
+
"Nuls": "nuls",
|
| 233 |
+
"Exprimés": "exprimes",
|
| 234 |
+
"Voix": "voix",
|
| 235 |
+
"Nuance Liste": "code_candidature",
|
| 236 |
+
"Libellé Abrégé Liste": "nom_candidature",
|
| 237 |
+
},
|
| 238 |
+
},
|
| 239 |
+
"22_L_T1.csv": {
|
| 240 |
+
"type_scrutin": "legislatives",
|
| 241 |
+
"date_scrutin": "2022-06-12",
|
| 242 |
+
"tour": 1,
|
| 243 |
+
"code_bv_cols": ["Code de la commune", "Code du b.vote"],
|
| 244 |
+
"rename_map": {
|
| 245 |
+
"Inscrits": "inscrits",
|
| 246 |
+
"Abstentions": "abstentions",
|
| 247 |
+
"Votants": "votants",
|
| 248 |
+
"Blancs": "blancs",
|
| 249 |
+
"Nuls": "nuls",
|
| 250 |
+
"Exprimés": "exprimes",
|
| 251 |
+
"Voix": "voix",
|
| 252 |
+
"Nuance": "code_candidature",
|
| 253 |
+
"Nom": "nom_candidature",
|
| 254 |
+
},
|
| 255 |
+
},
|
| 256 |
+
"22_L_T2.csv": {
|
| 257 |
+
"type_scrutin": "legislatives",
|
| 258 |
+
"date_scrutin": "2022-06-19",
|
| 259 |
+
"tour": 2,
|
| 260 |
+
"code_bv_cols": ["Code de la commune", "Code du b.vote"],
|
| 261 |
+
"rename_map": {
|
| 262 |
+
"Inscrits": "inscrits",
|
| 263 |
+
"Abstentions": "abstentions",
|
| 264 |
+
"Votants": "votants",
|
| 265 |
+
"Blancs": "blancs",
|
| 266 |
+
"Nuls": "nuls",
|
| 267 |
+
"Exprimés": "exprimes",
|
| 268 |
+
"Voix": "voix",
|
| 269 |
+
"Nuance": "code_candidature",
|
| 270 |
+
"Nom": "nom_candidature",
|
| 271 |
+
},
|
| 272 |
+
},
|
| 273 |
+
"22_PR_T1.csv": {
|
| 274 |
+
"type_scrutin": "presidentielles",
|
| 275 |
+
"date_scrutin": "2022-04-10",
|
| 276 |
+
"tour": 1,
|
| 277 |
+
"code_bv_cols": ["Code de la commune", "Code du b.vote"],
|
| 278 |
+
"rename_map": {
|
| 279 |
+
"Inscrits": "inscrits",
|
| 280 |
+
"Abstentions": "abstentions",
|
| 281 |
+
"Votants": "votants",
|
| 282 |
+
"Blancs": "blancs",
|
| 283 |
+
"Nuls": "nuls",
|
| 284 |
+
"Exprimés": "exprimes",
|
| 285 |
+
"Voix": "voix",
|
| 286 |
+
"Nom": "nom_candidature",
|
| 287 |
+
"Code nuance du candidat": "code_candidature",
|
| 288 |
+
},
|
| 289 |
+
},
|
| 290 |
+
"22_PR_T2.csv": {
|
| 291 |
+
"type_scrutin": "presidentielles",
|
| 292 |
+
"date_scrutin": "2022-04-24",
|
| 293 |
+
"tour": 2,
|
| 294 |
+
"code_bv_cols": ["Code de la commune", "Code du b.vote"],
|
| 295 |
+
"rename_map": {
|
| 296 |
+
"Inscrits": "inscrits",
|
| 297 |
+
"Abstentions": "abstentions",
|
| 298 |
+
"Votants": "votants",
|
| 299 |
+
"Blancs": "blancs",
|
| 300 |
+
"Nuls": "nuls",
|
| 301 |
+
"Exprimés": "exprimes",
|
| 302 |
+
"Voix": "voix",
|
| 303 |
+
"Nom": "nom_candidature",
|
| 304 |
+
"Code nuance du candidat": "code_candidature",
|
| 305 |
+
},
|
| 306 |
+
},
|
| 307 |
+
"24_EU.csv": {
|
| 308 |
+
"type_scrutin": "europeennes",
|
| 309 |
+
"date_scrutin": "2024-06-09",
|
| 310 |
+
"tour": 1,
|
| 311 |
+
"code_bv_cols": ["Code commune", "Code BV"],
|
| 312 |
+
"rename_map": {
|
| 313 |
+
"Inscrits": "inscrits",
|
| 314 |
+
"Abstentions": "abstentions",
|
| 315 |
+
"Votants": "votants",
|
| 316 |
+
"Blancs": "blancs",
|
| 317 |
+
"Nuls": "nuls",
|
| 318 |
+
"Exprimés": "exprimes",
|
| 319 |
+
"Voix 1": "voix",
|
| 320 |
+
"Voix": "voix",
|
| 321 |
+
"Nuance liste 1": "code_candidature",
|
| 322 |
+
"Libellé abrégé de liste 1": "nom_candidature",
|
| 323 |
+
},
|
| 324 |
+
},
|
| 325 |
+
"24_L_T1.csv": {
|
| 326 |
+
"type_scrutin": "legislatives",
|
| 327 |
+
"date_scrutin": "2024-06-30",
|
| 328 |
+
"tour": 1,
|
| 329 |
+
"code_bv_cols": ["Code commune", "Code BV"],
|
| 330 |
+
"rename_map": {
|
| 331 |
+
"Inscrits": "inscrits",
|
| 332 |
+
"Abstentions": "abstentions",
|
| 333 |
+
"Votants": "votants",
|
| 334 |
+
"Blancs": "blancs",
|
| 335 |
+
"Nuls": "nuls",
|
| 336 |
+
"Exprimés": "exprimes",
|
| 337 |
+
"Voix": "voix",
|
| 338 |
+
"Nuance Liste": "code_candidature",
|
| 339 |
+
"Libellé Abrégé Liste": "nom_candidature",
|
| 340 |
+
"Binôme": "nom_candidature",
|
| 341 |
+
},
|
| 342 |
+
},
|
| 343 |
+
"24_L_T2.csv": {
|
| 344 |
+
"type_scrutin": "legislatives",
|
| 345 |
+
"date_scrutin": "2024-07-07",
|
| 346 |
+
"tour": 2,
|
| 347 |
+
"code_bv_cols": ["Code commune", "Code BV"],
|
| 348 |
+
"rename_map": {
|
| 349 |
+
"Inscrits": "inscrits",
|
| 350 |
+
"Abstentions": "abstentions",
|
| 351 |
+
"Votants": "votants",
|
| 352 |
+
"Blancs": "blancs",
|
| 353 |
+
"Nuls": "nuls",
|
| 354 |
+
"Exprimés": "exprimes",
|
| 355 |
+
"Voix": "voix",
|
| 356 |
+
"Nuance Liste": "code_candidature",
|
| 357 |
+
"Libellé Abrégé Liste": "nom_candidature",
|
| 358 |
+
"Binôme": "nom_candidature",
|
| 359 |
+
},
|
| 360 |
+
},
|
| 361 |
+
}
|
| 362 |
+
|
| 363 |
+
DEFAULT_META_CONFIG_PATH = Path(__file__).resolve().parents[2] / "config" / "raw_sources.yaml"
|
| 364 |
+
|
| 365 |
+
|
| 366 |
+
def _resolve_meta_config(raw: Mapping[str, Mapping[str, Any]]) -> Dict[str, Dict[str, Any]]:
|
| 367 |
+
resolved: Dict[str, Dict[str, Any]] = {}
|
| 368 |
+
|
| 369 |
+
def resolve_one(key: str, stack: list[str]) -> Dict[str, Any]:
|
| 370 |
+
if key in resolved:
|
| 371 |
+
return resolved[key]
|
| 372 |
+
if key in stack:
|
| 373 |
+
raise ValueError(f"Cycle detecte dans meta-config: {' -> '.join(stack + [key])}")
|
| 374 |
+
meta = dict(raw[key])
|
| 375 |
+
base_key = meta.pop("copy_from", None)
|
| 376 |
+
if base_key:
|
| 377 |
+
if base_key not in raw:
|
| 378 |
+
raise KeyError(f"copy_from cible introuvable: {base_key}")
|
| 379 |
+
base = resolve_one(base_key, stack + [key])
|
| 380 |
+
merged = dict(base)
|
| 381 |
+
rename_base = dict(base.get("rename_map", {}))
|
| 382 |
+
rename_override = dict(meta.get("rename_map", {}))
|
| 383 |
+
merged.update(meta)
|
| 384 |
+
if rename_base or rename_override:
|
| 385 |
+
merged["rename_map"] = {**rename_base, **rename_override}
|
| 386 |
+
resolved[key] = merged
|
| 387 |
+
else:
|
| 388 |
+
resolved[key] = meta
|
| 389 |
+
return resolved[key]
|
| 390 |
+
|
| 391 |
+
for name in raw:
|
| 392 |
+
resolve_one(name, [])
|
| 393 |
+
return resolved
|
| 394 |
+
|
| 395 |
+
|
| 396 |
+
def load_meta_config(meta_path: Path | None) -> Dict[str, Dict[str, Any]]:
|
| 397 |
+
if meta_path is None:
|
| 398 |
+
if DEFAULT_META_CONFIG_PATH.exists():
|
| 399 |
+
meta_path = DEFAULT_META_CONFIG_PATH
|
| 400 |
+
else:
|
| 401 |
+
return DEFAULT_META_CONFIG
|
| 402 |
+
if not meta_path.exists():
|
| 403 |
+
raise FileNotFoundError(f"Meta-config file not found: {meta_path}")
|
| 404 |
+
if meta_path.suffix in {".yml", ".yaml"}:
|
| 405 |
+
try:
|
| 406 |
+
import yaml
|
| 407 |
+
except Exception as exc:
|
| 408 |
+
raise RuntimeError("PyYAML is required to read YAML meta-config files.") from exc
|
| 409 |
+
raw = yaml.safe_load(meta_path.read_text()) or {}
|
| 410 |
+
else:
|
| 411 |
+
raw = json.loads(meta_path.read_text())
|
| 412 |
+
if not isinstance(raw, dict):
|
| 413 |
+
raise ValueError("Meta-config invalide: attendu un mapping de fichiers vers meta-donnees.")
|
| 414 |
+
return _resolve_meta_config(raw)
|
| 415 |
+
|
| 416 |
+
|
| 417 |
+
def preprocess_all(raw_dir: Path, output_dir: Path, meta_config: Mapping[str, Mapping[str, Any]]) -> pd.DataFrame:
|
| 418 |
+
frames = []
|
| 419 |
+
missing: list[str] = []
|
| 420 |
+
for file_name, meta in meta_config.items():
|
| 421 |
+
path = raw_dir / file_name
|
| 422 |
+
if not path.exists():
|
| 423 |
+
missing.append(file_name)
|
| 424 |
+
continue
|
| 425 |
+
LOGGER.info("Standardisation de %s", file_name)
|
| 426 |
+
df_std = data_prep.standardize_election(
|
| 427 |
+
path,
|
| 428 |
+
meta,
|
| 429 |
+
rename_map=meta.get("rename_map", {}),
|
| 430 |
+
sep=meta.get("sep", ";"),
|
| 431 |
+
encoding=meta.get("encoding", ("cp1252", "utf-8-sig", "latin-1")),
|
| 432 |
+
decimal=meta.get("decimal", ","),
|
| 433 |
+
) # type: ignore[arg-type]
|
| 434 |
+
frames.append(df_std)
|
| 435 |
+
if missing:
|
| 436 |
+
LOGGER.warning("Fichiers manquants ignorés: %s", ", ".join(sorted(missing)))
|
| 437 |
+
if not frames:
|
| 438 |
+
raise RuntimeError("Aucune donnée chargée : vérifier le dossier raw et la configuration meta.")
|
| 439 |
+
|
| 440 |
+
elections_long = pd.concat(frames, ignore_index=True)
|
| 441 |
+
elections_long["date_scrutin"] = pd.to_datetime(elections_long["date_scrutin"])
|
| 442 |
+
elections_long["annee"] = elections_long["date_scrutin"].dt.year
|
| 443 |
+
elections_long["type_scrutin"] = elections_long["type_scrutin"].str.lower()
|
| 444 |
+
elections_long["code_commune"] = elections_long["code_bv"].astype(str).str.split("-").str[0]
|
| 445 |
+
|
| 446 |
+
issues = data_prep.validate_consistency(elections_long)
|
| 447 |
+
for name, df_issue in issues.items():
|
| 448 |
+
if len(df_issue) > 0:
|
| 449 |
+
LOGGER.warning("%s : %s lignes a inspecter", name, len(df_issue))
|
| 450 |
+
|
| 451 |
+
output_dir.mkdir(parents=True, exist_ok=True)
|
| 452 |
+
parquet_path = output_dir / "elections_long.parquet"
|
| 453 |
+
csv_path = output_dir / "elections_long.csv"
|
| 454 |
+
elections_long.to_parquet(parquet_path, index=False)
|
| 455 |
+
elections_long.to_csv(csv_path, sep=";", index=False)
|
| 456 |
+
LOGGER.info("Long format sauvegarde (%s lignes) -> %s / %s", len(elections_long), parquet_path, csv_path)
|
| 457 |
+
return elections_long
|
| 458 |
+
|
| 459 |
+
|
| 460 |
+
def parse_args() -> argparse.Namespace:
|
| 461 |
+
parser = argparse.ArgumentParser(description="Prétraitement des fichiers bruts en format long standardisé.")
|
| 462 |
+
parser.add_argument("--raw-dir", type=Path, default=Path("data/raw"), help="Répertoire des fichiers bruts CSV.")
|
| 463 |
+
parser.add_argument("--output-dir", type=Path, default=Path("data/interim"), help="Destination du format long harmonisé.")
|
| 464 |
+
parser.add_argument(
|
| 465 |
+
"--meta-config",
|
| 466 |
+
type=Path,
|
| 467 |
+
default=None,
|
| 468 |
+
help="Chemin vers un fichier JSON/YAML décrivant les meta-données des scrutins. Par défaut, utilise la configuration embarquée.",
|
| 469 |
+
)
|
| 470 |
+
return parser.parse_args()
|
| 471 |
+
|
| 472 |
+
|
| 473 |
+
def main() -> None:
|
| 474 |
+
logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s")
|
| 475 |
+
args = parse_args()
|
| 476 |
+
meta_config = load_meta_config(args.meta_config)
|
| 477 |
+
preprocess_all(args.raw_dir, args.output_dir, meta_config)
|
| 478 |
+
|
| 479 |
+
|
| 480 |
+
if __name__ == "__main__":
|
| 481 |
+
main()
|
src/data_prep.py
ADDED
|
@@ -0,0 +1,418 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
import re
|
| 5 |
+
from typing import Dict, Iterable, List, Mapping, Optional
|
| 6 |
+
|
| 7 |
+
import numpy as np
|
| 8 |
+
import pandas as pd
|
| 9 |
+
|
| 10 |
+
# Columns kept across all scrutins
|
| 11 |
+
STANDARD_COLUMNS: List[str] = [
|
| 12 |
+
"code_bv",
|
| 13 |
+
"nom_bv",
|
| 14 |
+
"annee",
|
| 15 |
+
"date_scrutin",
|
| 16 |
+
"type_scrutin",
|
| 17 |
+
"tour",
|
| 18 |
+
"inscrits",
|
| 19 |
+
"votants",
|
| 20 |
+
"abstentions",
|
| 21 |
+
"blancs",
|
| 22 |
+
"nuls",
|
| 23 |
+
"exprimes",
|
| 24 |
+
"code_candidature",
|
| 25 |
+
"nom_candidature",
|
| 26 |
+
"voix",
|
| 27 |
+
]
|
| 28 |
+
|
| 29 |
+
NUMERIC_COLUMNS = [
|
| 30 |
+
"inscrits",
|
| 31 |
+
"votants",
|
| 32 |
+
"abstentions",
|
| 33 |
+
"blancs",
|
| 34 |
+
"nuls",
|
| 35 |
+
"exprimes",
|
| 36 |
+
"voix",
|
| 37 |
+
]
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
_MOJIBAKE_REPLACEMENTS = {
|
| 41 |
+
"é": "é",
|
| 42 |
+
"è": "è",
|
| 43 |
+
"ê": "ê",
|
| 44 |
+
"ë": "ë",
|
| 45 |
+
"Ã ": "à",
|
| 46 |
+
"â": "â",
|
| 47 |
+
"ç": "ç",
|
| 48 |
+
"ù": "ù",
|
| 49 |
+
"û": "û",
|
| 50 |
+
"ï": "ï",
|
| 51 |
+
"ô": "ô",
|
| 52 |
+
"ö": "ö",
|
| 53 |
+
"É": "É",
|
| 54 |
+
"È": "È",
|
| 55 |
+
"Ê": "Ê",
|
| 56 |
+
"Ë": "Ë",
|
| 57 |
+
"À": "À",
|
| 58 |
+
"Â": "Â",
|
| 59 |
+
"Ç": "Ç",
|
| 60 |
+
"�": "°",
|
| 61 |
+
"�": "°",
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def _normalize_label(label: str) -> str:
|
| 66 |
+
"""
|
| 67 |
+
Attempt to repair mojibake in column labels (UTF-8 read as latin-1 or vice versa).
|
| 68 |
+
"""
|
| 69 |
+
fixed = label
|
| 70 |
+
try:
|
| 71 |
+
fixed = label.encode("latin1").decode("utf-8")
|
| 72 |
+
except (UnicodeEncodeError, UnicodeDecodeError):
|
| 73 |
+
fixed = label
|
| 74 |
+
else:
|
| 75 |
+
if "Â" in fixed:
|
| 76 |
+
fixed = fixed.replace("Â", "")
|
| 77 |
+
try:
|
| 78 |
+
# Alternate path: utf-8 bytes decoded as latin1 then re-decoded
|
| 79 |
+
fixed = fixed.encode("utf-8").decode("latin1")
|
| 80 |
+
except (UnicodeEncodeError, UnicodeDecodeError):
|
| 81 |
+
pass
|
| 82 |
+
for bad, good in _MOJIBAKE_REPLACEMENTS.items():
|
| 83 |
+
if bad in fixed:
|
| 84 |
+
fixed = fixed.replace(bad, good)
|
| 85 |
+
fixed = fixed.replace("\ufeff", "") # remove BOM
|
| 86 |
+
fixed = " ".join(fixed.split()) # normalise whitespace
|
| 87 |
+
return fixed
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
def _canonical_label(label: str) -> str:
|
| 91 |
+
"""
|
| 92 |
+
Lowercase alpha-numeric only version of a label for fuzzy matching.
|
| 93 |
+
"""
|
| 94 |
+
import re
|
| 95 |
+
|
| 96 |
+
norm = _normalize_label(label).lower()
|
| 97 |
+
return re.sub(r"[^0-9a-z]", "", norm)
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
def _unpivot_wide_candidates(df: pd.DataFrame) -> pd.DataFrame:
|
| 101 |
+
"""
|
| 102 |
+
Detect wide candidate columns (e.g., 'Voix 1', 'Nuance liste 2') and unpivot to long.
|
| 103 |
+
Keeps one row per candidate with standard columns 'voix' and 'code_candidature'.
|
| 104 |
+
"""
|
| 105 |
+
pattern = re.compile(r"^(?P<base>.*?)(?:\s+|_)?(?P<idx>\d+)$")
|
| 106 |
+
candidate_map: Dict[str, Dict[str, str]] = {}
|
| 107 |
+
wide_cols: set[str] = set()
|
| 108 |
+
for col in df.columns:
|
| 109 |
+
match = pattern.match(col)
|
| 110 |
+
if not match:
|
| 111 |
+
continue
|
| 112 |
+
wide_cols.add(col)
|
| 113 |
+
base = match.group("base").strip()
|
| 114 |
+
idx = match.group("idx")
|
| 115 |
+
canon = _canonical_label(base)
|
| 116 |
+
field = None
|
| 117 |
+
if canon == "voix":
|
| 118 |
+
field = "voix"
|
| 119 |
+
elif canon in {"nuance", "nuanceliste", "codenuance", "codenuanceducandidat", "codenuanceliste"}:
|
| 120 |
+
field = "code_candidature"
|
| 121 |
+
if field:
|
| 122 |
+
candidate_map.setdefault(idx, {})[field] = col
|
| 123 |
+
|
| 124 |
+
indices = [
|
| 125 |
+
idx for idx, fields in candidate_map.items()
|
| 126 |
+
if {"voix", "code_candidature"}.issubset(fields.keys())
|
| 127 |
+
]
|
| 128 |
+
if len(indices) <= 1:
|
| 129 |
+
return df
|
| 130 |
+
|
| 131 |
+
candidate_cols = {col for fields in candidate_map.values() for col in fields.values()}
|
| 132 |
+
base_cols = [c for c in df.columns if c not in wide_cols]
|
| 133 |
+
frames = []
|
| 134 |
+
for idx in sorted(indices, key=lambda v: int(v)):
|
| 135 |
+
fields = candidate_map[idx]
|
| 136 |
+
use_cols = base_cols + list(fields.values())
|
| 137 |
+
sub = df[use_cols].copy()
|
| 138 |
+
sub = sub.rename(
|
| 139 |
+
columns={
|
| 140 |
+
fields["voix"]: "voix",
|
| 141 |
+
fields["code_candidature"]: "code_candidature",
|
| 142 |
+
}
|
| 143 |
+
)
|
| 144 |
+
frames.append(sub)
|
| 145 |
+
return pd.concat(frames, ignore_index=True)
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
def deduplicate_columns(df: pd.DataFrame) -> pd.DataFrame:
|
| 149 |
+
"""
|
| 150 |
+
If multiple columns end up with the same name after rename/normalization,
|
| 151 |
+
keep the first non-null value across duplicates and drop the extras.
|
| 152 |
+
"""
|
| 153 |
+
df = df.copy()
|
| 154 |
+
duplicates = df.columns[df.columns.duplicated()].unique()
|
| 155 |
+
for col in duplicates:
|
| 156 |
+
cols = [c for c in df.columns if c == col]
|
| 157 |
+
base = df[cols[0]]
|
| 158 |
+
for extra in cols[1:]:
|
| 159 |
+
base = base.fillna(df[extra])
|
| 160 |
+
df[col] = base
|
| 161 |
+
df = df.drop(columns=cols[1:])
|
| 162 |
+
# ensure uniqueness
|
| 163 |
+
df = df.loc[:, ~df.columns.duplicated()]
|
| 164 |
+
return df
|
| 165 |
+
|
| 166 |
+
|
| 167 |
+
def load_raw(
|
| 168 |
+
path: Path,
|
| 169 |
+
*,
|
| 170 |
+
sep: str = ";",
|
| 171 |
+
encoding: str | Iterable[str] = "cp1252",
|
| 172 |
+
decimal: str = ",",
|
| 173 |
+
dtype: Optional[Mapping[str, str]] = None,
|
| 174 |
+
engine: str = "c",
|
| 175 |
+
) -> pd.DataFrame:
|
| 176 |
+
"""
|
| 177 |
+
Wrapper around read_csv with encoding fallbacks to mitigate mojibake.
|
| 178 |
+
|
| 179 |
+
Tries encodings in order (default: cp1252, utf-8-sig, latin-1) until column
|
| 180 |
+
names no longer contain replacement artefacts (� or Ã), then normalises labels.
|
| 181 |
+
"""
|
| 182 |
+
encoding_choices: List[str] = []
|
| 183 |
+
if isinstance(encoding, str):
|
| 184 |
+
encoding_choices.append(encoding)
|
| 185 |
+
else:
|
| 186 |
+
encoding_choices.extend(list(encoding))
|
| 187 |
+
encoding_choices.extend([e for e in ["utf-8-sig", "latin-1"] if e not in encoding_choices])
|
| 188 |
+
|
| 189 |
+
last_exc: Optional[Exception] = None
|
| 190 |
+
for enc in encoding_choices:
|
| 191 |
+
try:
|
| 192 |
+
try:
|
| 193 |
+
df = pd.read_csv(
|
| 194 |
+
path,
|
| 195 |
+
sep=sep,
|
| 196 |
+
encoding=enc,
|
| 197 |
+
decimal=decimal,
|
| 198 |
+
dtype=dtype, # type: ignore
|
| 199 |
+
engine=engine, # type: ignore
|
| 200 |
+
low_memory=False,
|
| 201 |
+
)
|
| 202 |
+
except pd.errors.ParserError:
|
| 203 |
+
# Retry with python engine and skip malformed lines (low_memory not supported)
|
| 204 |
+
df = pd.read_csv(
|
| 205 |
+
path,
|
| 206 |
+
sep=sep,
|
| 207 |
+
encoding=enc,
|
| 208 |
+
decimal=decimal,
|
| 209 |
+
dtype=dtype, # type: ignore
|
| 210 |
+
engine="python",
|
| 211 |
+
on_bad_lines="skip",
|
| 212 |
+
)
|
| 213 |
+
except UnicodeDecodeError as exc:
|
| 214 |
+
last_exc = exc
|
| 215 |
+
continue
|
| 216 |
+
|
| 217 |
+
bad_cols = any(("�" in col) or ("Ã" in col) for col in df.columns)
|
| 218 |
+
if bad_cols and enc != encoding_choices[-1]:
|
| 219 |
+
# try next encoding candidate
|
| 220 |
+
continue
|
| 221 |
+
|
| 222 |
+
df.columns = [_normalize_label(c) for c in df.columns]
|
| 223 |
+
return df
|
| 224 |
+
|
| 225 |
+
if last_exc:
|
| 226 |
+
raise last_exc
|
| 227 |
+
raise UnicodeDecodeError("utf-8", b"", 0, 1, "unable to decode with provided encodings")
|
| 228 |
+
|
| 229 |
+
|
| 230 |
+
def ensure_columns(df: pd.DataFrame, required: Iterable[str]) -> pd.DataFrame:
|
| 231 |
+
"""
|
| 232 |
+
Add missing columns with NaN placeholders to guarantee downstream compatibility.
|
| 233 |
+
"""
|
| 234 |
+
for col in required:
|
| 235 |
+
if col not in df.columns:
|
| 236 |
+
df[col] = np.nan
|
| 237 |
+
return df
|
| 238 |
+
|
| 239 |
+
|
| 240 |
+
def add_election_metadata(df: pd.DataFrame, meta: Mapping[str, object]) -> pd.DataFrame:
|
| 241 |
+
"""
|
| 242 |
+
Attach metadata about the scrutin to each row.
|
| 243 |
+
|
| 244 |
+
Required meta keys:
|
| 245 |
+
- type_scrutin
|
| 246 |
+
- tour
|
| 247 |
+
- date_scrutin
|
| 248 |
+
|
| 249 |
+
Optional:
|
| 250 |
+
- annee (otherwise derived from date_scrutin)
|
| 251 |
+
"""
|
| 252 |
+
df["type_scrutin"] = meta["type_scrutin"]
|
| 253 |
+
df["tour"] = int(meta["tour"]) # type: ignore
|
| 254 |
+
df["date_scrutin"] = pd.to_datetime(meta["date_scrutin"]) # type: ignore
|
| 255 |
+
df["annee"] = meta.get("annee", df["date_scrutin"].dt.year) # type: ignore
|
| 256 |
+
return df
|
| 257 |
+
|
| 258 |
+
|
| 259 |
+
def build_code_bv(df: pd.DataFrame, meta: Mapping[str, object]) -> pd.DataFrame:
|
| 260 |
+
"""
|
| 261 |
+
Ensure a code_bv column exists. If already present, it is left intact.
|
| 262 |
+
|
| 263 |
+
Optionally, pass in meta["code_bv_cols"] as a list of column names to combine.
|
| 264 |
+
"""
|
| 265 |
+
if "code_bv" in df.columns:
|
| 266 |
+
df["code_bv"] = df["code_bv"].astype(str).str.strip()
|
| 267 |
+
return df
|
| 268 |
+
|
| 269 |
+
columns_to_concat: Optional[List[str]] = meta.get("code_bv_cols") # type: ignore[arg-type]
|
| 270 |
+
if columns_to_concat:
|
| 271 |
+
actual_cols: List[str] = []
|
| 272 |
+
canon_map = {_canonical_label(col): col for col in df.columns}
|
| 273 |
+
for target in columns_to_concat:
|
| 274 |
+
canon = _canonical_label(target)
|
| 275 |
+
if canon in canon_map:
|
| 276 |
+
actual_cols.append(canon_map[canon])
|
| 277 |
+
else:
|
| 278 |
+
raise KeyError(f"{target!r} not found in columns. Available: {list(df.columns)}")
|
| 279 |
+
|
| 280 |
+
df["code_bv"] = (
|
| 281 |
+
df[actual_cols]
|
| 282 |
+
.astype(str)
|
| 283 |
+
.apply(lambda row: "-".join([v.zfill(3) if v.isdigit() else v for v in row]), axis=1)
|
| 284 |
+
)
|
| 285 |
+
else:
|
| 286 |
+
raise KeyError("code_bv not found in dataframe and no code_bv_cols provided in meta.")
|
| 287 |
+
return df
|
| 288 |
+
|
| 289 |
+
|
| 290 |
+
def coerce_numeric(df: pd.DataFrame, numeric_cols: Iterable[str] = NUMERIC_COLUMNS) -> pd.DataFrame:
|
| 291 |
+
for col in numeric_cols:
|
| 292 |
+
if col in df.columns:
|
| 293 |
+
df[col] = pd.to_numeric(df[col], errors="coerce")
|
| 294 |
+
return df
|
| 295 |
+
|
| 296 |
+
|
| 297 |
+
def basic_cleaning(df: pd.DataFrame) -> pd.DataFrame:
|
| 298 |
+
"""
|
| 299 |
+
Apply harmonisations common to all scrutins.
|
| 300 |
+
"""
|
| 301 |
+
df = df.copy()
|
| 302 |
+
df["voix"] = df.get("voix", 0).fillna(0) # type: ignore
|
| 303 |
+
|
| 304 |
+
# Recompute exprimes when possible
|
| 305 |
+
mask_expr = (
|
| 306 |
+
df["exprimes"].isna()
|
| 307 |
+
& df["votants"].notna()
|
| 308 |
+
& df["blancs"].notna()
|
| 309 |
+
& df["nuls"].notna()
|
| 310 |
+
)
|
| 311 |
+
df.loc[mask_expr, "exprimes"] = (
|
| 312 |
+
df.loc[mask_expr, "votants"] - df.loc[mask_expr, "blancs"] - df.loc[mask_expr, "nuls"]
|
| 313 |
+
)
|
| 314 |
+
|
| 315 |
+
# Remove rows without minimal identifiers
|
| 316 |
+
df = df[df["code_bv"].notna()]
|
| 317 |
+
return df
|
| 318 |
+
|
| 319 |
+
|
| 320 |
+
def standardize_election(
|
| 321 |
+
path: Path,
|
| 322 |
+
meta: Mapping[str, object],
|
| 323 |
+
*,
|
| 324 |
+
rename_map: Optional[Mapping[str, str]] = None,
|
| 325 |
+
sep: str = ";",
|
| 326 |
+
encoding: str | Iterable[str] = ("cp1252", "utf-8-sig", "latin-1"),
|
| 327 |
+
decimal: str = ",",
|
| 328 |
+
dtype: Optional[Mapping[str, str]] = None,
|
| 329 |
+
) -> pd.DataFrame:
|
| 330 |
+
"""
|
| 331 |
+
Load and standardise a single raw table to the long format expected downstream.
|
| 332 |
+
|
| 333 |
+
Parameters
|
| 334 |
+
----------
|
| 335 |
+
path : Path
|
| 336 |
+
CSV path to the raw election table.
|
| 337 |
+
meta : Mapping
|
| 338 |
+
Must contain type_scrutin, tour, date_scrutin. Optionally code_bv_cols and annee.
|
| 339 |
+
rename_map : Mapping
|
| 340 |
+
Columns to rename from the raw schema to the standard schema.
|
| 341 |
+
"""
|
| 342 |
+
df_raw = load_raw(path, sep=sep, encoding=encoding, decimal=decimal, dtype=dtype)
|
| 343 |
+
rename_norm = {_normalize_label(k): v for k, v in (rename_map or {}).items()}
|
| 344 |
+
|
| 345 |
+
def _process(df: pd.DataFrame, meta_for_tour: Mapping[str, object]) -> pd.DataFrame:
|
| 346 |
+
df_local = df.copy()
|
| 347 |
+
df_local.columns = [_normalize_label(c) for c in df_local.columns]
|
| 348 |
+
df_local = _unpivot_wide_candidates(df_local)
|
| 349 |
+
if rename_norm:
|
| 350 |
+
# Renommer en se basant sur une version canonique (sans accents/espaces) et en ignorant d'éventuels suffixes numériques.
|
| 351 |
+
import re
|
| 352 |
+
|
| 353 |
+
def canonical_base(label: str) -> str:
|
| 354 |
+
base = _canonical_label(label)
|
| 355 |
+
return re.sub(r"\\d+$", "", base)
|
| 356 |
+
|
| 357 |
+
rename_by_base = {canonical_base(k): v for k, v in rename_norm.items()}
|
| 358 |
+
rename_using = {}
|
| 359 |
+
for col in df_local.columns:
|
| 360 |
+
base = canonical_base(col)
|
| 361 |
+
if base in rename_by_base:
|
| 362 |
+
rename_using[col] = rename_by_base[base]
|
| 363 |
+
df_local = df_local.rename(columns=rename_using)
|
| 364 |
+
df_local = deduplicate_columns(df_local)
|
| 365 |
+
df_local = df_local.loc[:, ~df_local.columns.duplicated()]
|
| 366 |
+
|
| 367 |
+
df_local = build_code_bv(df_local, meta_for_tour)
|
| 368 |
+
df_local = add_election_metadata(df_local, meta_for_tour)
|
| 369 |
+
df_local = ensure_columns(df_local, STANDARD_COLUMNS)
|
| 370 |
+
df_local = coerce_numeric(df_local)
|
| 371 |
+
df_local = basic_cleaning(df_local)
|
| 372 |
+
ordered_cols = STANDARD_COLUMNS + [col for col in df_local.columns if col not in STANDARD_COLUMNS]
|
| 373 |
+
return df_local[ordered_cols]
|
| 374 |
+
|
| 375 |
+
# Multi-tour handling: split on tour_column if provided and "tour" not explicit
|
| 376 |
+
if meta.get("tour_column") and "tour" not in meta:
|
| 377 |
+
tour_col = _normalize_label(str(meta["tour_column"]))
|
| 378 |
+
if tour_col not in df_raw.columns:
|
| 379 |
+
# Fallback: considérer un seul tour = 1 si la colonne est introuvable
|
| 380 |
+
meta_single = {k: v for k, v in meta.items() if k != "tour_column"}
|
| 381 |
+
meta_single["tour"] = int(meta.get("tour", 1))
|
| 382 |
+
return _process(df_raw, meta_single)
|
| 383 |
+
tours = meta.get("tours") or sorted(df_raw[tour_col].dropna().unique())
|
| 384 |
+
frames: list[pd.DataFrame] = []
|
| 385 |
+
for tour_val in tours:
|
| 386 |
+
meta_tour = {k: v for k, v in meta.items() if k != "tour_column"}
|
| 387 |
+
meta_tour["tour"] = int(tour_val)
|
| 388 |
+
frames.append(_process(df_raw[df_raw[tour_col] == tour_val], meta_tour))
|
| 389 |
+
if not frames:
|
| 390 |
+
raise RuntimeError(f"Aucun tour détecté pour {path.name}")
|
| 391 |
+
return pd.concat(frames, ignore_index=True)
|
| 392 |
+
|
| 393 |
+
return _process(df_raw, meta)
|
| 394 |
+
|
| 395 |
+
|
| 396 |
+
def validate_consistency(df: pd.DataFrame, *, tolerance: float = 0.02) -> Dict[str, pd.DataFrame]:
|
| 397 |
+
"""
|
| 398 |
+
Quick validation checks. Returns a dict of issues to inspect.
|
| 399 |
+
"""
|
| 400 |
+
issues: Dict[str, pd.DataFrame] = {}
|
| 401 |
+
|
| 402 |
+
if {"votants", "inscrits"}.issubset(df.columns):
|
| 403 |
+
issues["votants_gt_inscrits"] = df[df["votants"] > df["inscrits"]]
|
| 404 |
+
|
| 405 |
+
if {"exprimes", "blancs", "nuls", "votants"}.issubset(df.columns):
|
| 406 |
+
expr_gap = df.copy()
|
| 407 |
+
expr_gap["gap"] = (
|
| 408 |
+
(expr_gap["exprimes"] + expr_gap["blancs"] + expr_gap["nuls"] - expr_gap["votants"])
|
| 409 |
+
/ expr_gap["votants"].replace(0, np.nan)
|
| 410 |
+
)
|
| 411 |
+
issues["exprimes_balance_off"] = expr_gap[expr_gap["gap"].abs() > tolerance]
|
| 412 |
+
|
| 413 |
+
if {"code_bv", "type_scrutin", "tour", "exprimes", "voix"}.issubset(df.columns):
|
| 414 |
+
sums = df.groupby(["code_bv", "type_scrutin", "tour"], as_index=False)[["exprimes", "voix"]].sum()
|
| 415 |
+
sums["gap"] = (sums["voix"] - sums["exprimes"]) / sums["exprimes"].replace(0, np.nan)
|
| 416 |
+
issues["sum_voix_vs_exprimes"] = sums[sums["gap"].abs() > tolerance]
|
| 417 |
+
|
| 418 |
+
return issues
|
src/database.py
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
from typing import Iterable, Optional
|
| 6 |
+
|
| 7 |
+
import pandas as pd
|
| 8 |
+
import sqlalchemy as sa
|
| 9 |
+
from sqlalchemy import Column, Date, Float, Integer, MetaData, String, Table
|
| 10 |
+
from sqlalchemy.engine import Engine
|
| 11 |
+
|
| 12 |
+
from .constants import NUMERIC_COLUMNS
|
| 13 |
+
from .pipeline import normalize_bloc
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def get_engine(url: Optional[str] = None) -> Engine:
|
| 17 |
+
db_url = url or os.getenv("DATABASE_URL")
|
| 18 |
+
if not db_url:
|
| 19 |
+
raise RuntimeError("DATABASE_URL is not set. Example: postgresql+psycopg2://user:pass@localhost:5432/elections")
|
| 20 |
+
return sa.create_engine(db_url)
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def define_schema(metadata: MetaData) -> Table:
|
| 24 |
+
return Table(
|
| 25 |
+
"election_results",
|
| 26 |
+
metadata,
|
| 27 |
+
Column("id", Integer, primary_key=True, autoincrement=True),
|
| 28 |
+
Column("code_bv", String(32), index=True, nullable=False),
|
| 29 |
+
Column("nom_bv", String(255)),
|
| 30 |
+
Column("date_scrutin", Date, index=True, nullable=False),
|
| 31 |
+
Column("annee", Integer, index=True, nullable=False),
|
| 32 |
+
Column("type_scrutin", String(32), index=True, nullable=False),
|
| 33 |
+
Column("tour", Integer, nullable=False),
|
| 34 |
+
Column("bloc", String(64), index=True, nullable=False),
|
| 35 |
+
Column("voix_bloc", Float),
|
| 36 |
+
Column("exprimes", Float),
|
| 37 |
+
Column("inscrits", Float),
|
| 38 |
+
Column("votants", Float),
|
| 39 |
+
Column("blancs", Float),
|
| 40 |
+
Column("nuls", Float),
|
| 41 |
+
Column("part_bloc", Float),
|
| 42 |
+
Column("part_bloc_national", Float),
|
| 43 |
+
Column("taux_participation_national", Float),
|
| 44 |
+
Column("taux_participation_bv", Float),
|
| 45 |
+
Column("taux_blancs_bv", Float),
|
| 46 |
+
Column("taux_nuls_bv", Float),
|
| 47 |
+
Column("ecart_bloc_vs_national", Float),
|
| 48 |
+
Column("ecart_participation_vs_nat", Float),
|
| 49 |
+
Column("croissance_inscrits_depuis_base", Float),
|
| 50 |
+
Column("part_bloc_lag1", Float),
|
| 51 |
+
Column("ecart_bloc_vs_national_lag1", Float),
|
| 52 |
+
Column("taux_participation_bv_lag1", Float),
|
| 53 |
+
Column("annee_centre", Float),
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def create_schema(engine: Engine) -> None:
|
| 58 |
+
metadata = MetaData()
|
| 59 |
+
define_schema(metadata)
|
| 60 |
+
metadata.create_all(engine)
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def _coerce_numeric(df: pd.DataFrame, numeric_cols: Iterable[str]) -> pd.DataFrame:
|
| 64 |
+
for col in numeric_cols:
|
| 65 |
+
if col in df.columns:
|
| 66 |
+
df[col] = pd.to_numeric(df[col], errors="coerce")
|
| 67 |
+
return df
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
def load_processed_to_db(
|
| 71 |
+
processed_path: Path = Path("data/processed/elections_blocs.csv"),
|
| 72 |
+
*,
|
| 73 |
+
engine: Optional[Engine] = None,
|
| 74 |
+
if_exists: str = "replace",
|
| 75 |
+
chunksize: int = 1000,
|
| 76 |
+
) -> int:
|
| 77 |
+
"""
|
| 78 |
+
Load the processed bloc-level dataset into PostgreSQL.
|
| 79 |
+
|
| 80 |
+
Returns the number of rows written.
|
| 81 |
+
"""
|
| 82 |
+
engine = engine or get_engine()
|
| 83 |
+
create_schema(engine)
|
| 84 |
+
|
| 85 |
+
df = pd.read_csv(processed_path, sep=";")
|
| 86 |
+
df["date_scrutin"] = pd.to_datetime(df["date_scrutin"]).dt.date
|
| 87 |
+
if "bloc" in df.columns:
|
| 88 |
+
df["bloc"] = df["bloc"].apply(normalize_bloc)
|
| 89 |
+
df = _coerce_numeric(df, NUMERIC_COLUMNS)
|
| 90 |
+
|
| 91 |
+
df.to_sql(
|
| 92 |
+
"election_results",
|
| 93 |
+
engine,
|
| 94 |
+
if_exists=if_exists,
|
| 95 |
+
index=False,
|
| 96 |
+
method="multi",
|
| 97 |
+
chunksize=chunksize,
|
| 98 |
+
)
|
| 99 |
+
return len(df)
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
def list_bureaux(engine: Engine) -> list[str]:
|
| 103 |
+
with engine.connect() as conn:
|
| 104 |
+
result = conn.execute(sa.text("select distinct code_bv from election_results order by code_bv"))
|
| 105 |
+
return [row[0] for row in result.fetchall()]
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
def fetch_history(engine: Engine, code_bv: str) -> pd.DataFrame:
|
| 109 |
+
query = sa.text(
|
| 110 |
+
"""
|
| 111 |
+
select *
|
| 112 |
+
from election_results
|
| 113 |
+
where code_bv = :code_bv
|
| 114 |
+
order by date_scrutin asc, bloc asc
|
| 115 |
+
"""
|
| 116 |
+
)
|
| 117 |
+
return pd.read_sql(query, engine, params={"code_bv": code_bv})
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
__all__ = [
|
| 121 |
+
"create_schema",
|
| 122 |
+
"define_schema",
|
| 123 |
+
"fetch_history",
|
| 124 |
+
"get_engine",
|
| 125 |
+
"list_bureaux",
|
| 126 |
+
"load_processed_to_db",
|
| 127 |
+
]
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
if __name__ == "__main__":
|
| 131 |
+
import argparse
|
| 132 |
+
|
| 133 |
+
parser = argparse.ArgumentParser(description="Initialise la base et charge les résultats.")
|
| 134 |
+
parser.add_argument(
|
| 135 |
+
"--load",
|
| 136 |
+
action="store_true",
|
| 137 |
+
help="Charger data/processed/elections_blocs.csv dans la base (remplace la table).",
|
| 138 |
+
)
|
| 139 |
+
parser.add_argument(
|
| 140 |
+
"--path",
|
| 141 |
+
type=Path,
|
| 142 |
+
default=Path("data/processed/elections_blocs.csv"),
|
| 143 |
+
help="Chemin vers le fichier processe (CSV ; par defaut data/processed/elections_blocs.csv).",
|
| 144 |
+
)
|
| 145 |
+
args = parser.parse_args()
|
| 146 |
+
|
| 147 |
+
engine = get_engine()
|
| 148 |
+
create_schema(engine)
|
| 149 |
+
if args.load:
|
| 150 |
+
rows = load_processed_to_db(args.path, engine=engine)
|
| 151 |
+
print(f"{rows} lignes inserees dans election_results.")
|
| 152 |
+
else:
|
| 153 |
+
print("Schema cree. Utilisez --load pour charger les donnees.")
|
src/db/__init__.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Database schema and ingestion utilities.
|
| 3 |
+
"""
|
src/db/ingest.py
ADDED
|
@@ -0,0 +1,241 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import argparse
|
| 4 |
+
import logging
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
from typing import Dict, Iterable, Tuple
|
| 7 |
+
|
| 8 |
+
import numpy as np
|
| 9 |
+
import pandas as pd
|
| 10 |
+
import sqlalchemy as sa
|
| 11 |
+
from sqlalchemy.dialects.postgresql import insert
|
| 12 |
+
|
| 13 |
+
from src.constants import CANDIDATE_CATEGORIES
|
| 14 |
+
from src.data import preprocess as preprocess_module
|
| 15 |
+
from src.db.schema import (
|
| 16 |
+
bureaux,
|
| 17 |
+
categories,
|
| 18 |
+
communes,
|
| 19 |
+
create_schema,
|
| 20 |
+
elections,
|
| 21 |
+
get_engine,
|
| 22 |
+
results_local,
|
| 23 |
+
results_national,
|
| 24 |
+
)
|
| 25 |
+
from src.features import build_features
|
| 26 |
+
|
| 27 |
+
LOGGER = logging.getLogger(__name__)
|
| 28 |
+
TARGET_COLS = [f"target_share_{c}" for c in CANDIDATE_CATEGORIES]
|
| 29 |
+
ID_COLS = ["commune_code", "code_bv", "election_type", "election_year", "round", "date_scrutin"]
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def load_panel(input_path: Path) -> pd.DataFrame:
|
| 33 |
+
if not input_path.exists():
|
| 34 |
+
raise FileNotFoundError(f"Dataset panel introuvable : {input_path}")
|
| 35 |
+
if input_path.suffix == ".parquet":
|
| 36 |
+
return pd.read_parquet(input_path)
|
| 37 |
+
return pd.read_csv(input_path, sep=";")
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def ensure_panel_exists(panel_path: Path, elections_long_path: Path, mapping_path: Path) -> pd.DataFrame:
|
| 41 |
+
if panel_path.exists():
|
| 42 |
+
return load_panel(panel_path)
|
| 43 |
+
LOGGER.info("Panel manquant, tentative de reconstruction via preprocess + build_features.")
|
| 44 |
+
if not elections_long_path.exists():
|
| 45 |
+
preprocess_module.preprocess_all(Path("data/raw"), elections_long_path.parent, preprocess_module.DEFAULT_META_CONFIG)
|
| 46 |
+
build_features.build_panel(elections_long_path, mapping_path, panel_path, csv_output=None)
|
| 47 |
+
return load_panel(panel_path)
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def check_mass(panel: pd.DataFrame, tolerance: float = 0.05) -> None:
|
| 51 |
+
sums = panel[TARGET_COLS].sum(axis=1)
|
| 52 |
+
bad = panel[(sums < (1 - tolerance)) | (sums > (1 + tolerance))]
|
| 53 |
+
if not bad.empty:
|
| 54 |
+
LOGGER.warning("Somme des parts hors intervalle attendu pour %s lignes (tol=%s).", len(bad), tolerance)
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def melt_panel(panel: pd.DataFrame) -> pd.DataFrame:
|
| 58 |
+
long_df = panel.melt(id_vars=ID_COLS + ["turnout_pct"], value_vars=TARGET_COLS, var_name="category", value_name="share")
|
| 59 |
+
long_df["category"] = long_df["category"].str.replace("target_share_", "", regex=False)
|
| 60 |
+
return long_df
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def _upsert_simple(conn, table, rows: Iterable[dict], index_elements: Iterable[str]) -> None:
|
| 64 |
+
stmt = insert(table).values(list(rows))
|
| 65 |
+
stmt = stmt.on_conflict_do_nothing(index_elements=list(index_elements))
|
| 66 |
+
if rows:
|
| 67 |
+
conn.execute(stmt)
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
def ingest(panel: pd.DataFrame, engine) -> None:
|
| 71 |
+
check_mass(panel)
|
| 72 |
+
panel = panel.copy()
|
| 73 |
+
panel["round"] = panel["round"].fillna(1).astype(int)
|
| 74 |
+
panel["date_scrutin"] = pd.to_datetime(panel["date_scrutin"]).dt.date
|
| 75 |
+
|
| 76 |
+
long_df = melt_panel(panel)
|
| 77 |
+
long_df = long_df[long_df["category"].isin(CANDIDATE_CATEGORIES)]
|
| 78 |
+
long_df["share_pct"] = (long_df["share"].astype(float) * 100).round(6)
|
| 79 |
+
|
| 80 |
+
with engine.begin() as conn:
|
| 81 |
+
create_schema(conn)
|
| 82 |
+
LOGGER.info("Schéma vérifié.")
|
| 83 |
+
|
| 84 |
+
_upsert_simple(conn, categories, [{"name": cat} for cat in CANDIDATE_CATEGORIES], ["name"])
|
| 85 |
+
cat_map = dict(conn.execute(sa.select(categories.c.name, categories.c.id)))
|
| 86 |
+
|
| 87 |
+
commune_rows = [
|
| 88 |
+
{"name_normalized": code, "insee_code": code}
|
| 89 |
+
for code in sorted(long_df["commune_code"].dropna().unique())
|
| 90 |
+
]
|
| 91 |
+
_upsert_simple(conn, communes, commune_rows, ["insee_code"])
|
| 92 |
+
commune_map = dict(conn.execute(sa.select(communes.c.insee_code, communes.c.id)))
|
| 93 |
+
|
| 94 |
+
def bureau_code_only(code_bv: str) -> str:
|
| 95 |
+
if "-" in str(code_bv):
|
| 96 |
+
parts = str(code_bv).split("-", 1)
|
| 97 |
+
return parts[1]
|
| 98 |
+
return str(code_bv)
|
| 99 |
+
|
| 100 |
+
bureau_rows = []
|
| 101 |
+
for _, row in long_df.drop_duplicates(subset=["commune_code", "code_bv"]).iterrows():
|
| 102 |
+
commune_id = commune_map.get(row["commune_code"])
|
| 103 |
+
if commune_id is None:
|
| 104 |
+
continue
|
| 105 |
+
bureau_rows.append(
|
| 106 |
+
{
|
| 107 |
+
"commune_id": commune_id,
|
| 108 |
+
"bureau_code": bureau_code_only(row["code_bv"]),
|
| 109 |
+
"bureau_label": None,
|
| 110 |
+
}
|
| 111 |
+
)
|
| 112 |
+
_upsert_simple(conn, bureaux, bureau_rows, ["commune_id", "bureau_code"])
|
| 113 |
+
bureau_map = {
|
| 114 |
+
(commune_id, bureau_code): bureau_id
|
| 115 |
+
for bureau_id, commune_id, bureau_code in conn.execute(
|
| 116 |
+
sa.select(bureaux.c.id, bureaux.c.commune_id, bureaux.c.bureau_code)
|
| 117 |
+
)
|
| 118 |
+
}
|
| 119 |
+
|
| 120 |
+
election_rows = []
|
| 121 |
+
for _, row in panel.drop_duplicates(subset=["election_type", "election_year", "round"]).iterrows():
|
| 122 |
+
election_rows.append(
|
| 123 |
+
{
|
| 124 |
+
"election_type": row["election_type"],
|
| 125 |
+
"election_year": int(row["election_year"]),
|
| 126 |
+
"round": int(row["round"]) if not pd.isna(row["round"]) else None,
|
| 127 |
+
"date": row["date_scrutin"],
|
| 128 |
+
}
|
| 129 |
+
)
|
| 130 |
+
_upsert_simple(conn, elections, election_rows, ["election_type", "election_year", "round"])
|
| 131 |
+
election_map: Dict[Tuple[str, int, int], int] = {
|
| 132 |
+
(etype, year, int(round_) if round_ is not None else 1): eid
|
| 133 |
+
for eid, etype, year, round_ in conn.execute(
|
| 134 |
+
sa.select(elections.c.id, elections.c.election_type, elections.c.election_year, elections.c.round)
|
| 135 |
+
)
|
| 136 |
+
}
|
| 137 |
+
|
| 138 |
+
local_rows = []
|
| 139 |
+
for row in long_df.itertuples(index=False):
|
| 140 |
+
commune_id = commune_map.get(row.commune_code)
|
| 141 |
+
if commune_id is None:
|
| 142 |
+
continue
|
| 143 |
+
bureau_id = bureau_map.get((commune_id, bureau_code_only(row.code_bv)))
|
| 144 |
+
election_id = election_map.get((row.election_type, int(row.election_year), int(row.round)))
|
| 145 |
+
category_id = cat_map.get(row.category)
|
| 146 |
+
if None in (bureau_id, election_id, category_id):
|
| 147 |
+
continue
|
| 148 |
+
turnout_pct = None if pd.isna(row.turnout_pct) else float(row.turnout_pct) * 100
|
| 149 |
+
local_rows.append(
|
| 150 |
+
{
|
| 151 |
+
"bureau_id": bureau_id,
|
| 152 |
+
"election_id": election_id,
|
| 153 |
+
"category_id": category_id,
|
| 154 |
+
"share_pct": None if pd.isna(row.share_pct) else float(row.share_pct),
|
| 155 |
+
"votes": None,
|
| 156 |
+
"expressed": None,
|
| 157 |
+
"turnout_pct": turnout_pct,
|
| 158 |
+
}
|
| 159 |
+
)
|
| 160 |
+
if local_rows:
|
| 161 |
+
stmt = insert(results_local).values(local_rows)
|
| 162 |
+
stmt = stmt.on_conflict_do_update(
|
| 163 |
+
index_elements=["bureau_id", "election_id", "category_id"],
|
| 164 |
+
set_={
|
| 165 |
+
"share_pct": stmt.excluded.share_pct,
|
| 166 |
+
"votes": stmt.excluded.votes,
|
| 167 |
+
"expressed": stmt.excluded.expressed,
|
| 168 |
+
"turnout_pct": stmt.excluded.turnout_pct,
|
| 169 |
+
},
|
| 170 |
+
)
|
| 171 |
+
conn.execute(stmt)
|
| 172 |
+
LOGGER.info("Résultats locaux insérés/mis à jour : %s lignes", len(local_rows))
|
| 173 |
+
|
| 174 |
+
nat_rows = []
|
| 175 |
+
nat = (
|
| 176 |
+
long_df.groupby(["election_type", "election_year", "round", "category"], as_index=False)
|
| 177 |
+
.agg(share=("share_pct", "mean"))
|
| 178 |
+
.rename(columns={"share": "share_pct"})
|
| 179 |
+
)
|
| 180 |
+
# Participation moyenne par scrutin
|
| 181 |
+
turnout_nat = panel.groupby(["election_type", "election_year", "round"], as_index=False)["turnout_pct"].mean()
|
| 182 |
+
nat = nat.merge(turnout_nat, on=["election_type", "election_year", "round"], how="left")
|
| 183 |
+
|
| 184 |
+
for row in nat.itertuples(index=False):
|
| 185 |
+
election_id = election_map.get((row.election_type, int(row.election_year), int(row.round)))
|
| 186 |
+
category_id = cat_map.get(row.category)
|
| 187 |
+
if None in (election_id, category_id):
|
| 188 |
+
continue
|
| 189 |
+
nat_rows.append(
|
| 190 |
+
{
|
| 191 |
+
"election_id": election_id,
|
| 192 |
+
"category_id": category_id,
|
| 193 |
+
"share_pct": None if pd.isna(row.share_pct) else float(row.share_pct),
|
| 194 |
+
"votes": None,
|
| 195 |
+
"expressed": None,
|
| 196 |
+
"turnout_pct": None if pd.isna(row.turnout_pct) else float(row.turnout_pct * 100),
|
| 197 |
+
}
|
| 198 |
+
)
|
| 199 |
+
if nat_rows:
|
| 200 |
+
stmt = insert(results_national).values(nat_rows)
|
| 201 |
+
stmt = stmt.on_conflict_do_update(
|
| 202 |
+
index_elements=["election_id", "category_id"],
|
| 203 |
+
set_={
|
| 204 |
+
"share_pct": stmt.excluded.share_pct,
|
| 205 |
+
"votes": stmt.excluded.votes,
|
| 206 |
+
"expressed": stmt.excluded.expressed,
|
| 207 |
+
"turnout_pct": stmt.excluded.turnout_pct,
|
| 208 |
+
},
|
| 209 |
+
)
|
| 210 |
+
conn.execute(stmt)
|
| 211 |
+
LOGGER.info("Référentiels nationaux insérés/mis à jour : %s lignes", len(nat_rows))
|
| 212 |
+
|
| 213 |
+
|
| 214 |
+
def parse_args() -> argparse.Namespace:
|
| 215 |
+
parser = argparse.ArgumentParser(description="Ingestion du panel harmonisé dans PostgreSQL.")
|
| 216 |
+
parser.add_argument("--input", type=Path, default=Path("data/processed/panel.parquet"), help="Chemin vers le panel parquet.")
|
| 217 |
+
parser.add_argument(
|
| 218 |
+
"--elections-long",
|
| 219 |
+
type=Path,
|
| 220 |
+
default=Path("data/interim/elections_long.parquet"),
|
| 221 |
+
help="Format long (fallback pour reconstruire le panel).",
|
| 222 |
+
)
|
| 223 |
+
parser.add_argument(
|
| 224 |
+
"--mapping",
|
| 225 |
+
type=Path,
|
| 226 |
+
default=Path("data/mapping_candidats_blocs.csv"),
|
| 227 |
+
help="Mapping nuance -> catégorie (fallback).",
|
| 228 |
+
)
|
| 229 |
+
return parser.parse_args()
|
| 230 |
+
|
| 231 |
+
|
| 232 |
+
def main() -> None:
|
| 233 |
+
logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s")
|
| 234 |
+
args = parse_args()
|
| 235 |
+
panel = ensure_panel_exists(args.input, args.elections_long, args.mapping)
|
| 236 |
+
engine = get_engine()
|
| 237 |
+
ingest(panel, engine)
|
| 238 |
+
|
| 239 |
+
|
| 240 |
+
if __name__ == "__main__":
|
| 241 |
+
main()
|
src/db/schema.py
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
from typing import Optional
|
| 5 |
+
|
| 6 |
+
import sqlalchemy as sa
|
| 7 |
+
from sqlalchemy import Column, Date, Float, ForeignKey, Integer, MetaData, String, Table, UniqueConstraint
|
| 8 |
+
from sqlalchemy.engine import Engine
|
| 9 |
+
|
| 10 |
+
metadata = MetaData()
|
| 11 |
+
|
| 12 |
+
communes = Table(
|
| 13 |
+
"communes",
|
| 14 |
+
metadata,
|
| 15 |
+
Column("id", Integer, primary_key=True, autoincrement=True),
|
| 16 |
+
Column("name_normalized", String(255), nullable=True),
|
| 17 |
+
Column("insee_code", String(12), nullable=False, unique=True, index=True),
|
| 18 |
+
)
|
| 19 |
+
|
| 20 |
+
bureaux = Table(
|
| 21 |
+
"bureaux",
|
| 22 |
+
metadata,
|
| 23 |
+
Column("id", Integer, primary_key=True, autoincrement=True),
|
| 24 |
+
Column("commune_id", Integer, ForeignKey("communes.id"), nullable=False),
|
| 25 |
+
Column("bureau_code", String(32), nullable=False),
|
| 26 |
+
Column("bureau_label", String(255), nullable=True),
|
| 27 |
+
UniqueConstraint("commune_id", "bureau_code", name="uq_bureau_commune_code"),
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
elections = Table(
|
| 31 |
+
"elections",
|
| 32 |
+
metadata,
|
| 33 |
+
Column("id", Integer, primary_key=True, autoincrement=True),
|
| 34 |
+
Column("election_type", String(32), nullable=False),
|
| 35 |
+
Column("election_year", Integer, nullable=False),
|
| 36 |
+
Column("round", Integer, nullable=True),
|
| 37 |
+
Column("date", Date, nullable=True),
|
| 38 |
+
UniqueConstraint("election_type", "election_year", "round", name="uq_election_unique"),
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
categories = Table(
|
| 42 |
+
"categories",
|
| 43 |
+
metadata,
|
| 44 |
+
Column("id", Integer, primary_key=True, autoincrement=True),
|
| 45 |
+
Column("name", String(64), nullable=False, unique=True),
|
| 46 |
+
)
|
| 47 |
+
|
| 48 |
+
results_local = Table(
|
| 49 |
+
"results_local",
|
| 50 |
+
metadata,
|
| 51 |
+
Column("id", Integer, primary_key=True, autoincrement=True),
|
| 52 |
+
Column("bureau_id", Integer, ForeignKey("bureaux.id"), nullable=False),
|
| 53 |
+
Column("election_id", Integer, ForeignKey("elections.id"), nullable=False),
|
| 54 |
+
Column("category_id", Integer, ForeignKey("categories.id"), nullable=False),
|
| 55 |
+
Column("share_pct", Float, nullable=True),
|
| 56 |
+
Column("votes", Float, nullable=True),
|
| 57 |
+
Column("expressed", Float, nullable=True),
|
| 58 |
+
Column("turnout_pct", Float, nullable=True),
|
| 59 |
+
UniqueConstraint("bureau_id", "election_id", "category_id", name="uq_local_bureau_election_category"),
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
results_national = Table(
|
| 63 |
+
"results_national",
|
| 64 |
+
metadata,
|
| 65 |
+
Column("id", Integer, primary_key=True, autoincrement=True),
|
| 66 |
+
Column("election_id", Integer, ForeignKey("elections.id"), nullable=False),
|
| 67 |
+
Column("category_id", Integer, ForeignKey("categories.id"), nullable=False),
|
| 68 |
+
Column("share_pct", Float, nullable=True),
|
| 69 |
+
Column("votes", Float, nullable=True),
|
| 70 |
+
Column("expressed", Float, nullable=True),
|
| 71 |
+
Column("turnout_pct", Float, nullable=True),
|
| 72 |
+
UniqueConstraint("election_id", "category_id", name="uq_nat_election_category"),
|
| 73 |
+
)
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
def _build_url_from_env() -> Optional[str]:
|
| 77 |
+
user = os.getenv("DB_USER") or os.getenv("POSTGRES_USER")
|
| 78 |
+
password = os.getenv("DB_PASSWORD") or os.getenv("POSTGRES_PASSWORD")
|
| 79 |
+
host = os.getenv("DB_HOST", "localhost")
|
| 80 |
+
port = os.getenv("DB_PORT", os.getenv("POSTGRES_PORT", "5432"))
|
| 81 |
+
db_name = os.getenv("DB_NAME") or os.getenv("POSTGRES_DB")
|
| 82 |
+
if user and password and db_name:
|
| 83 |
+
return f"postgresql+psycopg2://{user}:{password}@{host}:{port}/{db_name}"
|
| 84 |
+
return None
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
def get_engine(url: Optional[str] = None) -> Engine:
|
| 88 |
+
db_url = url or os.getenv("DATABASE_URL") or _build_url_from_env()
|
| 89 |
+
if not db_url:
|
| 90 |
+
raise RuntimeError("DATABASE_URL or DB_* env vars must be set.")
|
| 91 |
+
return sa.create_engine(db_url)
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
def create_schema(engine: Engine) -> None:
|
| 95 |
+
metadata.create_all(engine)
|
src/features/__init__.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Feature engineering subpackage.
|
| 3 |
+
"""
|
src/features/build_features.py
ADDED
|
@@ -0,0 +1,570 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import argparse
|
| 4 |
+
import logging
|
| 5 |
+
import re
|
| 6 |
+
import unicodedata
|
| 7 |
+
from functools import reduce
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
from typing import Dict, Iterable, List
|
| 10 |
+
|
| 11 |
+
import numpy as np
|
| 12 |
+
import pandas as pd
|
| 13 |
+
|
| 14 |
+
from src.constants import CANDIDATE_CATEGORIES
|
| 15 |
+
|
| 16 |
+
LOGGER = logging.getLogger(__name__)
|
| 17 |
+
|
| 18 |
+
INDEX_COLS = [
|
| 19 |
+
"commune_code",
|
| 20 |
+
"code_bv",
|
| 21 |
+
"election_type",
|
| 22 |
+
"election_year",
|
| 23 |
+
"round",
|
| 24 |
+
"date_scrutin",
|
| 25 |
+
]
|
| 26 |
+
|
| 27 |
+
PRESIDENTIAL_NAME_TO_CATEGORY = {
|
| 28 |
+
"arthaud": "extreme_gauche",
|
| 29 |
+
"poutou": "extreme_gauche",
|
| 30 |
+
"melenchon": "gauche_dure",
|
| 31 |
+
"roussel": "gauche_dure",
|
| 32 |
+
"hidalgo": "gauche_modere",
|
| 33 |
+
"jadot": "gauche_modere",
|
| 34 |
+
"hamon": "gauche_modere",
|
| 35 |
+
"macron": "centre",
|
| 36 |
+
"lassalle": "centre",
|
| 37 |
+
"cheminade": "centre",
|
| 38 |
+
"pecresse": "droite_modere",
|
| 39 |
+
"fillon": "droite_modere",
|
| 40 |
+
"dupontaignan": "droite_dure",
|
| 41 |
+
"asselineau": "droite_dure",
|
| 42 |
+
"lepen": "extreme_droite",
|
| 43 |
+
"zemmour": "extreme_droite",
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
EUROPEAN_LIST_KEYWORDS: list[tuple[str, str]] = [
|
| 47 |
+
("rassemblementnational", "extreme_droite"),
|
| 48 |
+
("lepen", "extreme_droite"),
|
| 49 |
+
("republiqueenmarche", "centre"),
|
| 50 |
+
("renaissance", "centre"),
|
| 51 |
+
("modem", "centre"),
|
| 52 |
+
("franceinsoumise", "gauche_dure"),
|
| 53 |
+
("lutteouvriere", "extreme_gauche"),
|
| 54 |
+
("revolutionnairecommunistes", "extreme_gauche"),
|
| 55 |
+
("communiste", "gauche_dure"),
|
| 56 |
+
("deboutlafrance", "droite_dure"),
|
| 57 |
+
("dupontaignan", "droite_dure"),
|
| 58 |
+
("frexit", "droite_dure"),
|
| 59 |
+
("patriotes", "droite_dure"),
|
| 60 |
+
("uniondeladroite", "droite_modere"),
|
| 61 |
+
("droiteetducentre", "droite_modere"),
|
| 62 |
+
("printempseuropeen", "gauche_modere"),
|
| 63 |
+
("generation", "gauche_modere"),
|
| 64 |
+
("animaliste", "gauche_modere"),
|
| 65 |
+
("ecolog", "gauche_modere"),
|
| 66 |
+
("federaliste", "centre"),
|
| 67 |
+
("pirate", "centre"),
|
| 68 |
+
("citoyenseuropeens", "centre"),
|
| 69 |
+
("leseuropeens", "centre"),
|
| 70 |
+
("lesoubliesdeleurope", "centre"),
|
| 71 |
+
("initiativecitoyenne", "centre"),
|
| 72 |
+
("esperanto", "centre"),
|
| 73 |
+
("europeauservicedespeuples", "droite_dure"),
|
| 74 |
+
("franceroyale", "extreme_droite"),
|
| 75 |
+
("pourleuropedesgens", "gauche_dure"),
|
| 76 |
+
("allonsenfants", "droite_modere"),
|
| 77 |
+
("alliancejaune", "centre"),
|
| 78 |
+
("giletsjaunes", "centre"),
|
| 79 |
+
]
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
def normalize_category(label: str | None) -> str | None:
|
| 83 |
+
if label is None:
|
| 84 |
+
return None
|
| 85 |
+
norm = str(label).strip().lower().replace(" ", "_").replace("-", "_")
|
| 86 |
+
synonyms = {
|
| 87 |
+
"doite_dure": "droite_dure",
|
| 88 |
+
"droite_moderee": "droite_modere",
|
| 89 |
+
"gauche_moderee": "gauche_modere",
|
| 90 |
+
"extreme_gauche": "extreme_gauche",
|
| 91 |
+
"extreme_droite": "extreme_droite",
|
| 92 |
+
"divers": None,
|
| 93 |
+
"gauche": "gauche_modere",
|
| 94 |
+
"droite": "droite_modere",
|
| 95 |
+
}
|
| 96 |
+
mapped = synonyms.get(norm, norm)
|
| 97 |
+
if mapped in CANDIDATE_CATEGORIES:
|
| 98 |
+
return mapped
|
| 99 |
+
return None
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
def _normalize_code_series(series: pd.Series) -> pd.Series:
|
| 103 |
+
return (
|
| 104 |
+
series.astype("string")
|
| 105 |
+
.str.strip()
|
| 106 |
+
.str.upper()
|
| 107 |
+
.replace({"NAN": pd.NA, "NONE": pd.NA, "": pd.NA, "<NA>": pd.NA})
|
| 108 |
+
)
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
def _normalize_person_name(value: str | None) -> str:
|
| 112 |
+
if value is None:
|
| 113 |
+
return ""
|
| 114 |
+
text = str(value).strip().lower()
|
| 115 |
+
if not text:
|
| 116 |
+
return ""
|
| 117 |
+
text = unicodedata.normalize("NFD", text)
|
| 118 |
+
text = "".join(ch for ch in text if unicodedata.category(ch) != "Mn")
|
| 119 |
+
return re.sub(r"[^a-z]", "", text)
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
def _category_from_name(name: str | None) -> str | None:
|
| 123 |
+
norm = _normalize_person_name(name)
|
| 124 |
+
if not norm:
|
| 125 |
+
return None
|
| 126 |
+
for key, category in PRESIDENTIAL_NAME_TO_CATEGORY.items():
|
| 127 |
+
if key in norm:
|
| 128 |
+
return category
|
| 129 |
+
return None
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
def _category_from_list_name(name: str | None) -> str | None:
|
| 133 |
+
norm = _normalize_person_name(name)
|
| 134 |
+
if not norm:
|
| 135 |
+
return None
|
| 136 |
+
for key, category in EUROPEAN_LIST_KEYWORDS:
|
| 137 |
+
if key in norm:
|
| 138 |
+
return category
|
| 139 |
+
return None
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
def load_elections_long(path: Path, commune_code: str | None = None) -> pd.DataFrame:
|
| 143 |
+
if not path.exists():
|
| 144 |
+
raise FileNotFoundError(f"Fichier long introuvable : {path}")
|
| 145 |
+
if path.suffix == ".parquet":
|
| 146 |
+
df = pd.read_parquet(path)
|
| 147 |
+
else:
|
| 148 |
+
df = pd.read_csv(path, sep=";")
|
| 149 |
+
df["date_scrutin"] = pd.to_datetime(df["date_scrutin"])
|
| 150 |
+
df["annee"] = pd.to_numeric(df["annee"], errors="coerce").fillna(df["date_scrutin"].dt.year)
|
| 151 |
+
df["election_year"] = df["annee"]
|
| 152 |
+
df["tour"] = pd.to_numeric(df["tour"], errors="coerce")
|
| 153 |
+
df["round"] = df["tour"]
|
| 154 |
+
for col in ["exprimes", "votants", "inscrits", "voix", "blancs", "nuls"]:
|
| 155 |
+
if col in df.columns:
|
| 156 |
+
df[col] = pd.to_numeric(df[col], errors="coerce")
|
| 157 |
+
if "code_candidature" in df.columns:
|
| 158 |
+
df["code_candidature"] = _normalize_code_series(df["code_candidature"])
|
| 159 |
+
if "code_commune" in df.columns:
|
| 160 |
+
df["code_commune"] = (
|
| 161 |
+
df["code_commune"]
|
| 162 |
+
.astype(str)
|
| 163 |
+
.str.strip()
|
| 164 |
+
.str.replace(r"\.0$", "", regex=True)
|
| 165 |
+
)
|
| 166 |
+
else:
|
| 167 |
+
df["code_commune"] = df["code_bv"].astype(str).str.split("-").str[0]
|
| 168 |
+
if commune_code is not None:
|
| 169 |
+
df = df[df["code_commune"].astype(str) == str(commune_code)].copy()
|
| 170 |
+
df = _unpivot_wide_candidates(df)
|
| 171 |
+
if "code_candidature" in df.columns:
|
| 172 |
+
df["code_candidature"] = _normalize_code_series(df["code_candidature"])
|
| 173 |
+
df["type_scrutin"] = df["type_scrutin"].str.lower()
|
| 174 |
+
df["election_type"] = df["type_scrutin"]
|
| 175 |
+
return df
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
def _unpivot_wide_candidates(df: pd.DataFrame) -> pd.DataFrame:
|
| 179 |
+
df = df.copy()
|
| 180 |
+
voix_cols = [c for c in df.columns if re.match(r"^Voix \d+$", str(c))]
|
| 181 |
+
if not voix_cols:
|
| 182 |
+
return df
|
| 183 |
+
wide_mask = df[voix_cols].notna().any(axis=1)
|
| 184 |
+
|
| 185 |
+
def _fill_unsuffixed_rows(local: pd.DataFrame) -> pd.DataFrame:
|
| 186 |
+
# Some datasets only expose unsuffixed columns (Voix, Code Nuance).
|
| 187 |
+
if "voix" in local.columns and "Voix" in local.columns:
|
| 188 |
+
missing_voix = local["voix"].isna() | (local["voix"] == 0)
|
| 189 |
+
local.loc[missing_voix, "voix"] = pd.to_numeric(
|
| 190 |
+
local.loc[missing_voix, "Voix"],
|
| 191 |
+
errors="coerce",
|
| 192 |
+
)
|
| 193 |
+
if "code_candidature" in local.columns:
|
| 194 |
+
if "Code Nuance" in local.columns:
|
| 195 |
+
local["code_candidature"] = local["code_candidature"].fillna(local["Code Nuance"])
|
| 196 |
+
if "Nuance" in local.columns:
|
| 197 |
+
local["code_candidature"] = local["code_candidature"].fillna(local["Nuance"])
|
| 198 |
+
if "nom_candidature" in local.columns:
|
| 199 |
+
if "Nom" in local.columns and "Prénom" in local.columns:
|
| 200 |
+
prenom = local["Prénom"].fillna("").astype(str).str.strip()
|
| 201 |
+
nom = local["Nom"].fillna("").astype(str).str.strip()
|
| 202 |
+
combined = (prenom + " " + nom).str.strip().replace("", pd.NA)
|
| 203 |
+
local["nom_candidature"] = local["nom_candidature"].fillna(combined)
|
| 204 |
+
elif "Nom" in local.columns:
|
| 205 |
+
local["nom_candidature"] = local["nom_candidature"].fillna(local["Nom"])
|
| 206 |
+
return local
|
| 207 |
+
|
| 208 |
+
if not wide_mask.any():
|
| 209 |
+
return _fill_unsuffixed_rows(df)
|
| 210 |
+
|
| 211 |
+
def _indexed_cols(pattern: str) -> Dict[int, str]:
|
| 212 |
+
mapping: Dict[int, str] = {}
|
| 213 |
+
for col in df.columns:
|
| 214 |
+
match = re.match(pattern, str(col))
|
| 215 |
+
if match:
|
| 216 |
+
mapping[int(match.group(1))] = col
|
| 217 |
+
return mapping
|
| 218 |
+
|
| 219 |
+
voice_map = _indexed_cols(r"^Voix (\d+)$")
|
| 220 |
+
code_map = _indexed_cols(r"^Code Nuance (\d+)$")
|
| 221 |
+
nuance_map = _indexed_cols(r"^Nuance (\d+)$")
|
| 222 |
+
for idx, col in nuance_map.items():
|
| 223 |
+
code_map.setdefault(idx, col)
|
| 224 |
+
if "voix" in df.columns:
|
| 225 |
+
voice_map.setdefault(1, "voix")
|
| 226 |
+
if "code_candidature" in df.columns:
|
| 227 |
+
code_map.setdefault(1, "code_candidature")
|
| 228 |
+
|
| 229 |
+
if not any(idx > 1 for idx in voice_map):
|
| 230 |
+
return df
|
| 231 |
+
|
| 232 |
+
drop_cols = {c for c in df.columns if re.search(r"\s\d+$", str(c))}
|
| 233 |
+
drop_cols.update({"voix", "code_candidature", "nom_candidature"})
|
| 234 |
+
base_cols = [c for c in df.columns if c not in drop_cols]
|
| 235 |
+
|
| 236 |
+
df_long = _fill_unsuffixed_rows(df[~wide_mask].copy())
|
| 237 |
+
df_wide = df[wide_mask].copy()
|
| 238 |
+
frames = []
|
| 239 |
+
|
| 240 |
+
def _compose_nom(idx: int) -> pd.Series | None:
|
| 241 |
+
series = pd.Series(pd.NA, index=df_wide.index, dtype="string")
|
| 242 |
+
etendu_col = f"Libellé Etendu Liste {idx}"
|
| 243 |
+
abrege_col = f"Libellé Abrégé Liste {idx}"
|
| 244 |
+
nom_col = f"Nom {idx}"
|
| 245 |
+
prenom_col = f"Prénom {idx}"
|
| 246 |
+
|
| 247 |
+
if etendu_col in df_wide.columns:
|
| 248 |
+
series = series.fillna(df_wide[etendu_col].astype("string"))
|
| 249 |
+
if abrege_col in df_wide.columns:
|
| 250 |
+
series = series.fillna(df_wide[abrege_col].astype("string"))
|
| 251 |
+
if nom_col in df_wide.columns and prenom_col in df_wide.columns:
|
| 252 |
+
prenom = df_wide[prenom_col].fillna("").astype(str).str.strip()
|
| 253 |
+
nom = df_wide[nom_col].fillna("").astype(str).str.strip()
|
| 254 |
+
combined = (prenom + " " + nom).str.strip().replace("", pd.NA)
|
| 255 |
+
series = series.fillna(combined)
|
| 256 |
+
elif nom_col in df_wide.columns:
|
| 257 |
+
series = series.fillna(df_wide[nom_col].astype("string"))
|
| 258 |
+
elif prenom_col in df_wide.columns:
|
| 259 |
+
series = series.fillna(df_wide[prenom_col].astype("string"))
|
| 260 |
+
if idx == 1 and "nom_candidature" in df_wide.columns:
|
| 261 |
+
series = series.fillna(df_wide["nom_candidature"].astype("string"))
|
| 262 |
+
if series.isna().all():
|
| 263 |
+
return None
|
| 264 |
+
return series
|
| 265 |
+
|
| 266 |
+
for idx in sorted(voice_map):
|
| 267 |
+
voix_col = voice_map[idx]
|
| 268 |
+
if voix_col not in df_wide.columns:
|
| 269 |
+
continue
|
| 270 |
+
temp = df_wide[base_cols].copy()
|
| 271 |
+
temp["voix"] = df_wide[voix_col]
|
| 272 |
+
code_candidates = []
|
| 273 |
+
if idx in code_map:
|
| 274 |
+
code_candidates.append(code_map[idx])
|
| 275 |
+
if idx in nuance_map and nuance_map[idx] not in code_candidates:
|
| 276 |
+
code_candidates.append(nuance_map[idx])
|
| 277 |
+
code_series = pd.Series(pd.NA, index=df_wide.index, dtype="string")
|
| 278 |
+
for candidate in code_candidates:
|
| 279 |
+
if candidate in df_wide.columns:
|
| 280 |
+
code_series = code_series.fillna(df_wide[candidate])
|
| 281 |
+
temp["code_candidature"] = code_series
|
| 282 |
+
nom_series = _compose_nom(idx)
|
| 283 |
+
if nom_series is not None:
|
| 284 |
+
temp["nom_candidature"] = nom_series
|
| 285 |
+
frames.append(temp)
|
| 286 |
+
|
| 287 |
+
if not frames:
|
| 288 |
+
return df
|
| 289 |
+
wide_long = pd.concat(frames, ignore_index=True)
|
| 290 |
+
wide_long["voix"] = pd.to_numeric(wide_long["voix"], errors="coerce")
|
| 291 |
+
wide_long = wide_long[wide_long["voix"].notna() & (wide_long["voix"] > 0)]
|
| 292 |
+
return pd.concat([df_long, wide_long], ignore_index=True)
|
| 293 |
+
|
| 294 |
+
|
| 295 |
+
def _mapping_from_yaml(mapping_path: Path) -> pd.DataFrame:
|
| 296 |
+
try:
|
| 297 |
+
import yaml
|
| 298 |
+
except Exception as exc:
|
| 299 |
+
raise RuntimeError("PyYAML est requis pour charger un mapping YAML.") from exc
|
| 300 |
+
raw = yaml.safe_load(mapping_path.read_text()) or {}
|
| 301 |
+
if not isinstance(raw, dict):
|
| 302 |
+
raise ValueError("Mapping YAML invalide: attendu un dictionnaire.")
|
| 303 |
+
|
| 304 |
+
base_mapping = raw.get("base_mapping")
|
| 305 |
+
mapping_entries = raw.get("mapping")
|
| 306 |
+
overrides = raw.get("overrides", [])
|
| 307 |
+
|
| 308 |
+
mapping = pd.DataFrame()
|
| 309 |
+
if mapping_entries:
|
| 310 |
+
mapping = pd.DataFrame(mapping_entries)
|
| 311 |
+
elif base_mapping:
|
| 312 |
+
base_path = Path(base_mapping)
|
| 313 |
+
if not base_path.is_absolute():
|
| 314 |
+
base_path = mapping_path.parent / base_path
|
| 315 |
+
mapping = pd.read_csv(base_path, sep=";")
|
| 316 |
+
else:
|
| 317 |
+
mapping = pd.DataFrame(columns=["code_candidature", "nom_candidature", "bloc_1", "bloc_2", "bloc_3"])
|
| 318 |
+
|
| 319 |
+
if overrides:
|
| 320 |
+
override_df = pd.DataFrame(overrides)
|
| 321 |
+
if not override_df.empty:
|
| 322 |
+
if "blocs" in override_df.columns:
|
| 323 |
+
blocs = override_df["blocs"].apply(lambda v: v if isinstance(v, list) else [])
|
| 324 |
+
override_df["bloc_1"] = blocs.apply(lambda v: v[0] if len(v) > 0 else None)
|
| 325 |
+
override_df["bloc_2"] = blocs.apply(lambda v: v[1] if len(v) > 1 else None)
|
| 326 |
+
override_df["bloc_3"] = blocs.apply(lambda v: v[2] if len(v) > 2 else None)
|
| 327 |
+
override_df = override_df.drop(columns=["blocs"])
|
| 328 |
+
if "code_candidature" not in override_df.columns and "code" in override_df.columns:
|
| 329 |
+
override_df = override_df.rename(columns={"code": "code_candidature"})
|
| 330 |
+
if "nom_candidature" not in override_df.columns and "nom" in override_df.columns:
|
| 331 |
+
override_df = override_df.rename(columns={"nom": "nom_candidature"})
|
| 332 |
+
|
| 333 |
+
if "code_candidature" in mapping.columns:
|
| 334 |
+
mapping["code_candidature"] = _normalize_code_series(mapping["code_candidature"])
|
| 335 |
+
if "code_candidature" in override_df.columns:
|
| 336 |
+
override_df["code_candidature"] = _normalize_code_series(override_df["code_candidature"])
|
| 337 |
+
|
| 338 |
+
mapping = mapping.copy()
|
| 339 |
+
for _, row in override_df.iterrows():
|
| 340 |
+
code = row.get("code_candidature")
|
| 341 |
+
if code is None:
|
| 342 |
+
continue
|
| 343 |
+
mask = mapping["code_candidature"] == code
|
| 344 |
+
if mask.any():
|
| 345 |
+
for col in ["nom_candidature", "bloc_1", "bloc_2", "bloc_3"]:
|
| 346 |
+
if col in row and pd.notna(row[col]):
|
| 347 |
+
mapping.loc[mask, col] = row[col]
|
| 348 |
+
else:
|
| 349 |
+
mapping = pd.concat([mapping, pd.DataFrame([row])], ignore_index=True)
|
| 350 |
+
return mapping
|
| 351 |
+
|
| 352 |
+
|
| 353 |
+
def load_mapping(mapping_path: Path) -> pd.DataFrame:
|
| 354 |
+
if not mapping_path.exists():
|
| 355 |
+
raise FileNotFoundError(f"Mapping candidats/blocs manquant : {mapping_path}")
|
| 356 |
+
if mapping_path.suffix in {".yml", ".yaml"}:
|
| 357 |
+
mapping = _mapping_from_yaml(mapping_path)
|
| 358 |
+
else:
|
| 359 |
+
mapping = pd.read_csv(mapping_path, sep=";")
|
| 360 |
+
if "code_candidature" in mapping.columns:
|
| 361 |
+
mapping["code_candidature"] = _normalize_code_series(mapping["code_candidature"])
|
| 362 |
+
bloc_cols = [c for c in mapping.columns if c.startswith("bloc")]
|
| 363 |
+
for col in bloc_cols:
|
| 364 |
+
mapping[col] = mapping[col].apply(normalize_category)
|
| 365 |
+
return mapping
|
| 366 |
+
|
| 367 |
+
|
| 368 |
+
def expand_by_category(elections_long: pd.DataFrame, mapping: pd.DataFrame) -> pd.DataFrame:
|
| 369 |
+
df = elections_long.merge(mapping, on="code_candidature", how="left", suffixes=("", "_map"))
|
| 370 |
+
records: list[dict] = []
|
| 371 |
+
for row in df.itertuples(index=False):
|
| 372 |
+
blocs = [getattr(row, col, None) for col in ["bloc_1", "bloc_2", "bloc_3"]]
|
| 373 |
+
blocs = [normalize_category(b) for b in blocs if isinstance(b, str) or b is not None]
|
| 374 |
+
blocs = [b for b in blocs if b is not None]
|
| 375 |
+
voix = getattr(row, "voix", 0) or 0
|
| 376 |
+
exprimes = getattr(row, "exprimes", np.nan)
|
| 377 |
+
votants = getattr(row, "votants", np.nan)
|
| 378 |
+
inscrits = getattr(row, "inscrits", np.nan)
|
| 379 |
+
blancs = getattr(row, "blancs", np.nan)
|
| 380 |
+
nuls = getattr(row, "nuls", np.nan)
|
| 381 |
+
if not blocs:
|
| 382 |
+
election_type = getattr(row, "election_type", None)
|
| 383 |
+
if election_type == "presidentielles":
|
| 384 |
+
nom = getattr(row, "nom_candidature", None)
|
| 385 |
+
mapped = _category_from_name(nom)
|
| 386 |
+
if mapped:
|
| 387 |
+
blocs = [mapped]
|
| 388 |
+
elif election_type == "europeennes":
|
| 389 |
+
nom = getattr(row, "nom_candidature", None)
|
| 390 |
+
mapped = _category_from_list_name(nom)
|
| 391 |
+
if mapped:
|
| 392 |
+
blocs = [mapped]
|
| 393 |
+
if not blocs:
|
| 394 |
+
# Fallback explicite : non mappé -> centre (évite un panel vide)
|
| 395 |
+
blocs = ["centre"]
|
| 396 |
+
part = voix / len(blocs) if len(blocs) > 0 else 0
|
| 397 |
+
for bloc in blocs:
|
| 398 |
+
records.append(
|
| 399 |
+
{
|
| 400 |
+
"commune_code": getattr(row, "code_commune"),
|
| 401 |
+
"code_bv": getattr(row, "code_bv"),
|
| 402 |
+
"election_type": getattr(row, "election_type"),
|
| 403 |
+
"election_year": int(getattr(row, "election_year")),
|
| 404 |
+
"round": int(getattr(row, "round")) if not pd.isna(getattr(row, "round")) else None,
|
| 405 |
+
"date_scrutin": getattr(row, "date_scrutin"),
|
| 406 |
+
"category": bloc,
|
| 407 |
+
"voix_cat": part,
|
| 408 |
+
"exprimes": exprimes,
|
| 409 |
+
"votants": votants,
|
| 410 |
+
"inscrits": inscrits,
|
| 411 |
+
"blancs": blancs,
|
| 412 |
+
"nuls": nuls,
|
| 413 |
+
}
|
| 414 |
+
)
|
| 415 |
+
return pd.DataFrame.from_records(records)
|
| 416 |
+
|
| 417 |
+
|
| 418 |
+
def aggregate_by_event(df: pd.DataFrame) -> pd.DataFrame:
|
| 419 |
+
group_cols = INDEX_COLS + ["category"]
|
| 420 |
+
agg = (
|
| 421 |
+
df.groupby(group_cols, as_index=False)
|
| 422 |
+
.agg(
|
| 423 |
+
voix_cat=("voix_cat", "sum"),
|
| 424 |
+
exprimes=("exprimes", "max"),
|
| 425 |
+
votants=("votants", "max"),
|
| 426 |
+
inscrits=("inscrits", "max"),
|
| 427 |
+
blancs=("blancs", "max"),
|
| 428 |
+
nuls=("nuls", "max"),
|
| 429 |
+
)
|
| 430 |
+
)
|
| 431 |
+
agg["share"] = agg["voix_cat"] / agg["exprimes"].replace(0, np.nan)
|
| 432 |
+
base_inscrits = agg["inscrits"].replace(0, np.nan)
|
| 433 |
+
agg["turnout_pct"] = agg["votants"] / base_inscrits
|
| 434 |
+
agg["blancs_pct"] = agg["blancs"] / base_inscrits
|
| 435 |
+
agg["nuls_pct"] = agg["nuls"] / base_inscrits
|
| 436 |
+
return agg
|
| 437 |
+
|
| 438 |
+
|
| 439 |
+
def compute_national_reference(local: pd.DataFrame) -> pd.DataFrame:
|
| 440 |
+
nat_group_cols = ["election_type", "election_year", "round", "category"]
|
| 441 |
+
nat = (
|
| 442 |
+
local.groupby(nat_group_cols, as_index=False)
|
| 443 |
+
.agg(
|
| 444 |
+
voix_cat=("voix_cat", "sum"),
|
| 445 |
+
exprimes=("exprimes", "sum"),
|
| 446 |
+
votants=("votants", "sum"),
|
| 447 |
+
inscrits=("inscrits", "sum"),
|
| 448 |
+
)
|
| 449 |
+
)
|
| 450 |
+
nat["share_nat"] = nat["voix_cat"] / nat["exprimes"].replace(0, np.nan)
|
| 451 |
+
nat["turnout_nat"] = nat["votants"] / nat["inscrits"].replace(0, np.nan)
|
| 452 |
+
return nat[nat_group_cols + ["share_nat", "turnout_nat"]]
|
| 453 |
+
|
| 454 |
+
|
| 455 |
+
def add_lags(local: pd.DataFrame) -> pd.DataFrame:
|
| 456 |
+
df = local.sort_values("date_scrutin").copy()
|
| 457 |
+
df["share_lag_any"] = df.groupby(["code_bv", "category"])["share"].shift(1)
|
| 458 |
+
df["share_lag2_any"] = df.groupby(["code_bv", "category"])["share"].shift(2)
|
| 459 |
+
df["share_lag_same_type"] = df.groupby(["code_bv", "category", "election_type"])["share"].shift(1)
|
| 460 |
+
df["dev_to_nat"] = df["share"] - df["share_nat"]
|
| 461 |
+
df["dev_to_nat_lag_any"] = df.groupby(["code_bv", "category"])["dev_to_nat"].shift(1)
|
| 462 |
+
df["dev_to_nat_lag_same_type"] = df.groupby(["code_bv", "category", "election_type"])["dev_to_nat"].shift(1)
|
| 463 |
+
df["swing_any"] = df["share_lag_any"] - df["share_lag2_any"]
|
| 464 |
+
return df
|
| 465 |
+
|
| 466 |
+
|
| 467 |
+
def _pivot_feature(df: pd.DataFrame, value_col: str, prefix: str) -> pd.DataFrame:
|
| 468 |
+
pivot = df.pivot_table(index=INDEX_COLS, columns="category", values=value_col)
|
| 469 |
+
pivot = pivot[[c for c in pivot.columns if c in CANDIDATE_CATEGORIES]]
|
| 470 |
+
pivot.columns = [f"{prefix}{c}" for c in pivot.columns]
|
| 471 |
+
pivot = pivot.reset_index()
|
| 472 |
+
return pivot
|
| 473 |
+
|
| 474 |
+
|
| 475 |
+
def build_panel(
|
| 476 |
+
elections_long_path: Path,
|
| 477 |
+
mapping_path: Path,
|
| 478 |
+
output_path: Path,
|
| 479 |
+
*,
|
| 480 |
+
csv_output: Path | None = None,
|
| 481 |
+
) -> pd.DataFrame:
|
| 482 |
+
elections_long = load_elections_long(elections_long_path)
|
| 483 |
+
mapping = load_mapping(mapping_path)
|
| 484 |
+
expanded = expand_by_category(elections_long, mapping)
|
| 485 |
+
local = aggregate_by_event(expanded)
|
| 486 |
+
|
| 487 |
+
nat = compute_national_reference(local)
|
| 488 |
+
local = local.merge(nat, on=["election_type", "election_year", "round", "category"], how="left")
|
| 489 |
+
local = add_lags(local)
|
| 490 |
+
|
| 491 |
+
turnout_event = (
|
| 492 |
+
local.groupby(INDEX_COLS, as_index=False)["turnout_pct"].max().sort_values("date_scrutin")
|
| 493 |
+
)
|
| 494 |
+
turnout_event["prev_turnout_any_lag1"] = turnout_event.groupby("code_bv")["turnout_pct"].shift(1)
|
| 495 |
+
turnout_event["prev_turnout_same_type_lag1"] = turnout_event.groupby(["code_bv", "election_type"])[
|
| 496 |
+
"turnout_pct"
|
| 497 |
+
].shift(1)
|
| 498 |
+
|
| 499 |
+
datasets: List[pd.DataFrame] = [
|
| 500 |
+
_pivot_feature(local, "share", "target_share_"),
|
| 501 |
+
_pivot_feature(local, "share_lag_any", "prev_share_any_lag1_"),
|
| 502 |
+
_pivot_feature(local, "share_lag_same_type", "prev_share_type_lag1_"),
|
| 503 |
+
_pivot_feature(local, "dev_to_nat_lag_any", "prev_dev_to_national_any_lag1_"),
|
| 504 |
+
_pivot_feature(local, "dev_to_nat_lag_same_type", "prev_dev_to_national_type_lag1_"),
|
| 505 |
+
_pivot_feature(local, "swing_any", "swing_any_"),
|
| 506 |
+
]
|
| 507 |
+
panel = reduce(lambda left, right: left.merge(right, on=INDEX_COLS, how="left"), datasets)
|
| 508 |
+
panel = panel.merge(
|
| 509 |
+
turnout_event[INDEX_COLS + ["turnout_pct", "prev_turnout_any_lag1", "prev_turnout_same_type_lag1"]],
|
| 510 |
+
on=INDEX_COLS,
|
| 511 |
+
how="left",
|
| 512 |
+
)
|
| 513 |
+
|
| 514 |
+
target_cols = [f"target_share_{c}" for c in CANDIDATE_CATEGORIES]
|
| 515 |
+
for col in target_cols:
|
| 516 |
+
if col not in panel.columns:
|
| 517 |
+
panel[col] = 0.0
|
| 518 |
+
panel[target_cols] = panel[target_cols].fillna(0).clip(lower=0, upper=1)
|
| 519 |
+
panel["target_sum_before_renorm"] = panel[target_cols].sum(axis=1)
|
| 520 |
+
has_mass = panel["target_sum_before_renorm"] > 0
|
| 521 |
+
panel.loc[has_mass, target_cols] = panel.loc[has_mass, target_cols].div(
|
| 522 |
+
panel.loc[has_mass, "target_sum_before_renorm"], axis=0
|
| 523 |
+
)
|
| 524 |
+
panel["target_sum_after_renorm"] = panel[target_cols].sum(axis=1)
|
| 525 |
+
|
| 526 |
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
| 527 |
+
panel.to_parquet(output_path, index=False)
|
| 528 |
+
if csv_output:
|
| 529 |
+
panel.to_csv(csv_output, sep=";", index=False)
|
| 530 |
+
LOGGER.info("Panel enregistré dans %s (%s lignes)", output_path, len(panel))
|
| 531 |
+
return panel
|
| 532 |
+
|
| 533 |
+
|
| 534 |
+
def parse_args() -> argparse.Namespace:
|
| 535 |
+
parser = argparse.ArgumentParser(description="Construction du dataset panel features+cibles sans fuite temporelle.")
|
| 536 |
+
parser.add_argument(
|
| 537 |
+
"--elections-long",
|
| 538 |
+
type=Path,
|
| 539 |
+
default=Path("data/interim/elections_long.parquet"),
|
| 540 |
+
help="Chemin du format long harmonisé.",
|
| 541 |
+
)
|
| 542 |
+
parser.add_argument(
|
| 543 |
+
"--mapping",
|
| 544 |
+
type=Path,
|
| 545 |
+
default=Path("config/nuances.yaml"),
|
| 546 |
+
help="Mapping nuance -> catégorie.",
|
| 547 |
+
)
|
| 548 |
+
parser.add_argument(
|
| 549 |
+
"--output",
|
| 550 |
+
type=Path,
|
| 551 |
+
default=Path("data/processed/panel.parquet"),
|
| 552 |
+
help="Destination du parquet panel.",
|
| 553 |
+
)
|
| 554 |
+
parser.add_argument(
|
| 555 |
+
"--output-csv",
|
| 556 |
+
type=Path,
|
| 557 |
+
default=Path("data/processed/panel.csv"),
|
| 558 |
+
help="Destination CSV optionnelle.",
|
| 559 |
+
)
|
| 560 |
+
return parser.parse_args()
|
| 561 |
+
|
| 562 |
+
|
| 563 |
+
def main() -> None:
|
| 564 |
+
logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s")
|
| 565 |
+
args = parse_args()
|
| 566 |
+
build_panel(args.elections_long, args.mapping, args.output, csv_output=args.output_csv)
|
| 567 |
+
|
| 568 |
+
|
| 569 |
+
if __name__ == "__main__":
|
| 570 |
+
main()
|
src/model/predict.py
ADDED
|
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import argparse
|
| 4 |
+
import json
|
| 5 |
+
import logging
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
from typing import Dict, List
|
| 8 |
+
|
| 9 |
+
import joblib
|
| 10 |
+
import numpy as np
|
| 11 |
+
import pandas as pd
|
| 12 |
+
|
| 13 |
+
from src.constants import CANDIDATE_CATEGORIES
|
| 14 |
+
from src.features.build_features import (
|
| 15 |
+
aggregate_by_event,
|
| 16 |
+
compute_national_reference,
|
| 17 |
+
expand_by_category,
|
| 18 |
+
load_elections_long,
|
| 19 |
+
load_mapping,
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
LOGGER = logging.getLogger(__name__)
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def filter_history(df: pd.DataFrame, target_year: int, commune_code: str | None) -> pd.DataFrame:
|
| 26 |
+
df = df[df["annee"] < target_year]
|
| 27 |
+
if commune_code:
|
| 28 |
+
df = df[df["code_commune"] == commune_code]
|
| 29 |
+
return df
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def build_feature_matrix(
|
| 33 |
+
elections_long: pd.DataFrame,
|
| 34 |
+
mapping: pd.DataFrame,
|
| 35 |
+
target_type: str,
|
| 36 |
+
target_year: int,
|
| 37 |
+
) -> pd.DataFrame:
|
| 38 |
+
expanded = expand_by_category(elections_long, mapping)
|
| 39 |
+
local = aggregate_by_event(expanded)
|
| 40 |
+
nat = compute_national_reference(local)
|
| 41 |
+
local = local.merge(nat, on=["election_type", "election_year", "round", "category"], how="left")
|
| 42 |
+
local["dev_to_nat"] = local["share"] - local["share_nat"]
|
| 43 |
+
local = local.sort_values("date_scrutin")
|
| 44 |
+
|
| 45 |
+
last_any_share = (
|
| 46 |
+
local.sort_values("date_scrutin").groupby(["code_bv", "category"])["share"].last()
|
| 47 |
+
)
|
| 48 |
+
last_any_dev = (
|
| 49 |
+
local.sort_values("date_scrutin").groupby(["code_bv", "category"])["dev_to_nat"].last()
|
| 50 |
+
)
|
| 51 |
+
last_type_share = (
|
| 52 |
+
local[local["election_type"] == target_type]
|
| 53 |
+
.sort_values("date_scrutin")
|
| 54 |
+
.groupby(["code_bv", "category"])["share"]
|
| 55 |
+
.last()
|
| 56 |
+
)
|
| 57 |
+
last_type_dev = (
|
| 58 |
+
local[local["election_type"] == target_type]
|
| 59 |
+
.sort_values("date_scrutin")
|
| 60 |
+
.groupby(["code_bv", "category"])["dev_to_nat"]
|
| 61 |
+
.last()
|
| 62 |
+
)
|
| 63 |
+
# Swing entre les deux derniers scrutins tous types
|
| 64 |
+
swing_any = (
|
| 65 |
+
local.groupby(["code_bv", "category"])["share"]
|
| 66 |
+
.apply(lambda s: s.iloc[-1] - s.iloc[-2] if len(s) >= 2 else np.nan)
|
| 67 |
+
.rename("swing_any")
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
+
turnout_any = local.groupby("code_bv")["turnout_pct"].last()
|
| 71 |
+
turnout_type = (
|
| 72 |
+
local[local["election_type"] == target_type]
|
| 73 |
+
.sort_values("date_scrutin")
|
| 74 |
+
.groupby("code_bv")["turnout_pct"]
|
| 75 |
+
.last()
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
+
bureaux = sorted(local["code_bv"].dropna().unique())
|
| 79 |
+
records: List[dict] = []
|
| 80 |
+
for code_bv in bureaux:
|
| 81 |
+
record = {
|
| 82 |
+
"commune_code": str(code_bv).split("-")[0],
|
| 83 |
+
"code_bv": code_bv,
|
| 84 |
+
"election_type": target_type,
|
| 85 |
+
"election_year": target_year,
|
| 86 |
+
"round": 1,
|
| 87 |
+
"date_scrutin": f"{target_year}-01-01",
|
| 88 |
+
"prev_turnout_any_lag1": turnout_any.get(code_bv, np.nan),
|
| 89 |
+
"prev_turnout_same_type_lag1": turnout_type.get(code_bv, np.nan),
|
| 90 |
+
}
|
| 91 |
+
for cat in CANDIDATE_CATEGORIES:
|
| 92 |
+
record[f"prev_share_any_lag1_{cat}"] = last_any_share.get((code_bv, cat), np.nan)
|
| 93 |
+
record[f"prev_share_type_lag1_{cat}"] = last_type_share.get((code_bv, cat), np.nan)
|
| 94 |
+
record[f"prev_dev_to_national_any_lag1_{cat}"] = last_any_dev.get((code_bv, cat), np.nan)
|
| 95 |
+
record[f"prev_dev_to_national_type_lag1_{cat}"] = last_type_dev.get((code_bv, cat), np.nan)
|
| 96 |
+
record[f"swing_any_{cat}"] = swing_any.get((code_bv, cat), np.nan)
|
| 97 |
+
records.append(record)
|
| 98 |
+
return pd.DataFrame.from_records(records)
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
def compute_references(local: pd.DataFrame, target_year: int) -> Dict[str, Dict[str, float]]:
|
| 102 |
+
refs: Dict[str, Dict[str, float]] = {}
|
| 103 |
+
leg = (
|
| 104 |
+
local[(local["election_type"] == "legislatives") & (local["election_year"] < target_year)]
|
| 105 |
+
.sort_values("date_scrutin")
|
| 106 |
+
.groupby(["code_bv", "category"])
|
| 107 |
+
.last()
|
| 108 |
+
)
|
| 109 |
+
mun2020 = (
|
| 110 |
+
local[(local["election_type"] == "municipales") & (local["election_year"] == 2020)]
|
| 111 |
+
.sort_values("date_scrutin")
|
| 112 |
+
.groupby(["code_bv", "category"])
|
| 113 |
+
.last()
|
| 114 |
+
)
|
| 115 |
+
refs["leg"] = {(code_bv, cat): row["share"] for (code_bv, cat), row in leg.iterrows()}
|
| 116 |
+
refs["mun2020"] = {(code_bv, cat): row["share"] for (code_bv, cat), row in mun2020.iterrows()}
|
| 117 |
+
return refs
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
def load_feature_columns(path: Path, df: pd.DataFrame) -> List[str]:
|
| 121 |
+
if path.exists():
|
| 122 |
+
return json.loads(path.read_text())
|
| 123 |
+
# fallback: use all non-target columns except identifiers
|
| 124 |
+
exclude = {"commune_code", "code_bv", "election_type", "election_year", "round", "date_scrutin"}
|
| 125 |
+
return [c for c in df.columns if c not in exclude]
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
def predict(
|
| 129 |
+
model_path: Path,
|
| 130 |
+
feature_df: pd.DataFrame,
|
| 131 |
+
feature_cols: List[str],
|
| 132 |
+
refs: Dict[str, Dict[str, float]],
|
| 133 |
+
) -> pd.DataFrame:
|
| 134 |
+
model = joblib.load(model_path)
|
| 135 |
+
# Align feature set with trained columns (add missing as NaN)
|
| 136 |
+
missing_cols = [c for c in feature_cols if c not in feature_df.columns]
|
| 137 |
+
for col in missing_cols:
|
| 138 |
+
feature_df[col] = np.nan
|
| 139 |
+
preds = model.predict(feature_df[feature_cols])
|
| 140 |
+
preds = np.clip(preds, 0, 1)
|
| 141 |
+
sums = preds.sum(axis=1, keepdims=True)
|
| 142 |
+
sums[sums == 0] = 1
|
| 143 |
+
preds = preds / sums
|
| 144 |
+
preds_pct = preds * 100
|
| 145 |
+
|
| 146 |
+
rows = []
|
| 147 |
+
for idx, row in feature_df.iterrows():
|
| 148 |
+
code_bv = row["code_bv"]
|
| 149 |
+
record = {
|
| 150 |
+
"commune_code": row["commune_code"],
|
| 151 |
+
"code_bv": code_bv,
|
| 152 |
+
}
|
| 153 |
+
for cat_idx, cat in enumerate(CANDIDATE_CATEGORIES):
|
| 154 |
+
pred_val = preds_pct[idx, cat_idx]
|
| 155 |
+
record[f"predicted_share_{cat}"] = round(float(pred_val), 2)
|
| 156 |
+
leg_ref = refs["leg"].get((code_bv, cat))
|
| 157 |
+
mun_ref = refs["mun2020"].get((code_bv, cat))
|
| 158 |
+
record[f"delta_leg_{cat}"] = "N/A" if leg_ref is None else round(float(pred_val - leg_ref * 100), 2)
|
| 159 |
+
record[f"delta_mun2020_{cat}"] = "N/A" if mun_ref is None else round(float(pred_val - mun_ref * 100), 2)
|
| 160 |
+
rows.append(record)
|
| 161 |
+
return pd.DataFrame(rows)
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
def main() -> None:
|
| 165 |
+
parser = argparse.ArgumentParser(description="Prédictions bureau par bureau pour une échéance cible.")
|
| 166 |
+
parser.add_argument("--model-path", type=Path, default=Path("models/hist_gradient_boosting.joblib"), help="Modèle entraîné.")
|
| 167 |
+
parser.add_argument("--feature-columns", type=Path, default=Path("models/feature_columns.json"), help="Colonnes de features attendues.")
|
| 168 |
+
parser.add_argument("--elections-long", type=Path, default=Path("data/interim/elections_long.parquet"), help="Historique long.")
|
| 169 |
+
parser.add_argument("--mapping", type=Path, default=Path("config/nuances.yaml"), help="Mapping nuances->catégories.")
|
| 170 |
+
parser.add_argument("--target-election-type", type=str, default="municipales", help="Type d'élection cible.")
|
| 171 |
+
parser.add_argument("--target-year", type=int, default=2026, help="Année cible.")
|
| 172 |
+
parser.add_argument("--commune-code", type=str, default="34301", help="Code commune à filtrer (Sete=34301).")
|
| 173 |
+
parser.add_argument("--output-dir", type=Path, default=Path("predictions"), help="Répertoire de sortie.")
|
| 174 |
+
args = parser.parse_args()
|
| 175 |
+
|
| 176 |
+
logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s")
|
| 177 |
+
|
| 178 |
+
elections_long = load_elections_long(args.elections_long)
|
| 179 |
+
elections_long = filter_history(elections_long, args.target_year, args.commune_code)
|
| 180 |
+
mapping = load_mapping(args.mapping)
|
| 181 |
+
|
| 182 |
+
feature_df = build_feature_matrix(elections_long, mapping, args.target_election_type, args.target_year)
|
| 183 |
+
if feature_df.empty:
|
| 184 |
+
raise RuntimeError("Aucune donnée historique disponible pour construire les features.")
|
| 185 |
+
feature_cols = load_feature_columns(args.feature_columns, feature_df)
|
| 186 |
+
refs = compute_references(
|
| 187 |
+
aggregate_by_event(expand_by_category(elections_long, mapping)).assign(
|
| 188 |
+
election_type=lambda d: d["election_type"]
|
| 189 |
+
),
|
| 190 |
+
args.target_year,
|
| 191 |
+
)
|
| 192 |
+
preds_df = predict(args.model_path, feature_df, feature_cols, refs)
|
| 193 |
+
|
| 194 |
+
args.output_dir.mkdir(parents=True, exist_ok=True)
|
| 195 |
+
output_path = args.output_dir / f"pred_{args.target_election_type}_{args.target_year}_sete.csv"
|
| 196 |
+
preds_df.to_csv(output_path, index=False)
|
| 197 |
+
LOGGER.info("Prédictions écrites dans %s", output_path)
|
| 198 |
+
|
| 199 |
+
|
| 200 |
+
if __name__ == "__main__":
|
| 201 |
+
main()
|
src/model/train.py
ADDED
|
@@ -0,0 +1,666 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import argparse
|
| 4 |
+
import json
|
| 5 |
+
import logging
|
| 6 |
+
import sys
|
| 7 |
+
from dataclasses import dataclass
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
from typing import Dict, List, Optional, Tuple
|
| 10 |
+
|
| 11 |
+
import joblib
|
| 12 |
+
import numpy as np
|
| 13 |
+
import pandas as pd
|
| 14 |
+
from sklearn.compose import ColumnTransformer
|
| 15 |
+
from sklearn.base import BaseEstimator, RegressorMixin, clone
|
| 16 |
+
from sklearn.ensemble import HistGradientBoostingClassifier, HistGradientBoostingRegressor
|
| 17 |
+
from sklearn.impute import SimpleImputer
|
| 18 |
+
from sklearn.linear_model import Ridge
|
| 19 |
+
from sklearn.metrics import (
|
| 20 |
+
explained_variance_score,
|
| 21 |
+
mean_absolute_error,
|
| 22 |
+
mean_squared_error,
|
| 23 |
+
median_absolute_error,
|
| 24 |
+
r2_score,
|
| 25 |
+
)
|
| 26 |
+
from sklearn.model_selection import TimeSeriesSplit
|
| 27 |
+
from sklearn.multioutput import MultiOutputRegressor
|
| 28 |
+
from sklearn.pipeline import Pipeline
|
| 29 |
+
from sklearn.preprocessing import StandardScaler
|
| 30 |
+
from sklearn.utils.validation import check_is_fitted
|
| 31 |
+
|
| 32 |
+
# Ensure project root is on sys.path when running as a script
|
| 33 |
+
PROJECT_ROOT = Path(__file__).resolve().parents[2]
|
| 34 |
+
if str(PROJECT_ROOT) not in sys.path:
|
| 35 |
+
sys.path.append(str(PROJECT_ROOT))
|
| 36 |
+
|
| 37 |
+
from src.constants import CANDIDATE_CATEGORIES
|
| 38 |
+
|
| 39 |
+
LOGGER = logging.getLogger(__name__)
|
| 40 |
+
|
| 41 |
+
TARGET_COLS = [f"target_share_{c}" for c in CANDIDATE_CATEGORIES]
|
| 42 |
+
META_COLS = [
|
| 43 |
+
"commune_code",
|
| 44 |
+
"code_bv",
|
| 45 |
+
"election_type",
|
| 46 |
+
"election_year",
|
| 47 |
+
"round",
|
| 48 |
+
"date_scrutin",
|
| 49 |
+
"target_sum_before_renorm",
|
| 50 |
+
"target_sum_after_renorm",
|
| 51 |
+
]
|
| 52 |
+
|
| 53 |
+
MODEL_GRIDS: Dict[str, List[Dict[str, object]]] = {
|
| 54 |
+
"ridge": [
|
| 55 |
+
{"alpha": 0.1},
|
| 56 |
+
{"alpha": 1.0},
|
| 57 |
+
{"alpha": 10.0},
|
| 58 |
+
{"alpha": 50.0},
|
| 59 |
+
],
|
| 60 |
+
"hist_gradient_boosting": [
|
| 61 |
+
{"max_depth": 3, "learning_rate": 0.08, "max_iter": 400, "min_samples_leaf": 30, "l2_regularization": 0.1},
|
| 62 |
+
{"max_depth": 4, "learning_rate": 0.05, "max_iter": 600, "min_samples_leaf": 20, "l2_regularization": 0.1},
|
| 63 |
+
{"max_depth": 4, "learning_rate": 0.1, "max_iter": 300, "min_samples_leaf": 50, "l2_regularization": 1.0},
|
| 64 |
+
{"max_depth": 6, "learning_rate": 0.05, "max_iter": 500, "min_samples_leaf": 40, "l2_regularization": 0.5},
|
| 65 |
+
{"max_depth": 3, "learning_rate": 0.05, "max_iter": 500, "min_samples_leaf": 80, "l2_regularization": 1.0},
|
| 66 |
+
{"max_depth": 3, "learning_rate": 0.04, "max_iter": 600, "min_samples_leaf": 120, "l2_regularization": 2.0},
|
| 67 |
+
{"max_depth": 2, "learning_rate": 0.08, "max_iter": 500, "min_samples_leaf": 150, "l2_regularization": 3.0},
|
| 68 |
+
],
|
| 69 |
+
"lightgbm": [
|
| 70 |
+
{"n_estimators": 600, "learning_rate": 0.05, "num_leaves": 31, "subsample": 0.8, "colsample_bytree": 0.8},
|
| 71 |
+
{"n_estimators": 400, "learning_rate": 0.08, "num_leaves": 16, "min_child_samples": 30, "subsample": 0.7, "colsample_bytree": 0.7},
|
| 72 |
+
],
|
| 73 |
+
"xgboost": [
|
| 74 |
+
{"n_estimators": 600, "learning_rate": 0.05, "max_depth": 6, "subsample": 0.8, "colsample_bytree": 0.8},
|
| 75 |
+
{"n_estimators": 400, "learning_rate": 0.08, "max_depth": 4, "subsample": 0.7, "colsample_bytree": 0.7},
|
| 76 |
+
],
|
| 77 |
+
"two_stage_hgb": [
|
| 78 |
+
{
|
| 79 |
+
"clf_params": {"max_depth": 3, "learning_rate": 0.08, "max_iter": 300, "min_samples_leaf": 30, "l2_regularization": 0.1},
|
| 80 |
+
"reg_params": {"max_depth": 3, "learning_rate": 0.08, "max_iter": 400, "min_samples_leaf": 30, "l2_regularization": 0.1},
|
| 81 |
+
"epsilon": 1e-4,
|
| 82 |
+
"use_logit": True,
|
| 83 |
+
"use_proba": True,
|
| 84 |
+
},
|
| 85 |
+
{
|
| 86 |
+
"clf_params": {"max_depth": 2, "learning_rate": 0.1, "max_iter": 300, "min_samples_leaf": 60, "l2_regularization": 0.2},
|
| 87 |
+
"reg_params": {"max_depth": 2, "learning_rate": 0.08, "max_iter": 500, "min_samples_leaf": 60, "l2_regularization": 0.5},
|
| 88 |
+
"epsilon": 1e-4,
|
| 89 |
+
"use_logit": True,
|
| 90 |
+
"use_proba": True,
|
| 91 |
+
},
|
| 92 |
+
],
|
| 93 |
+
"catboost": [
|
| 94 |
+
{"depth": 6, "learning_rate": 0.05, "iterations": 500},
|
| 95 |
+
{"depth": 4, "learning_rate": 0.08, "iterations": 400},
|
| 96 |
+
],
|
| 97 |
+
}
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
@dataclass
|
| 101 |
+
class SplitConfig:
|
| 102 |
+
train_end_year: int
|
| 103 |
+
valid_end_year: int
|
| 104 |
+
test_start_year: int
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
def load_panel(path: Path) -> pd.DataFrame:
|
| 108 |
+
if not path.exists():
|
| 109 |
+
raise FileNotFoundError(f"Panel introuvable : {path}")
|
| 110 |
+
if path.suffix == ".parquet":
|
| 111 |
+
df = pd.read_parquet(path)
|
| 112 |
+
else:
|
| 113 |
+
df = pd.read_csv(path, sep=";")
|
| 114 |
+
df["election_year"] = pd.to_numeric(df["election_year"], errors="coerce")
|
| 115 |
+
df["round"] = pd.to_numeric(df["round"], errors="coerce")
|
| 116 |
+
return df
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
def get_feature_columns(df: pd.DataFrame) -> List[str]:
|
| 120 |
+
exclude = set(TARGET_COLS + META_COLS)
|
| 121 |
+
candidates = [c for c in df.columns if c not in exclude]
|
| 122 |
+
numeric_feats = [c for c in candidates if pd.api.types.is_numeric_dtype(df[c])]
|
| 123 |
+
return numeric_feats
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
def temporal_split(df: pd.DataFrame, cfg: SplitConfig) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
|
| 127 |
+
train = df[df["election_year"] <= cfg.train_end_year]
|
| 128 |
+
valid = df[(df["election_year"] > cfg.train_end_year) & (df["election_year"] <= cfg.valid_end_year)]
|
| 129 |
+
test = df[df["election_year"] >= cfg.test_start_year]
|
| 130 |
+
return train, valid, test
|
| 131 |
+
|
| 132 |
+
|
| 133 |
+
def make_preprocessor(feature_cols: List[str]) -> ColumnTransformer:
|
| 134 |
+
return ColumnTransformer(
|
| 135 |
+
transformers=[
|
| 136 |
+
("num", Pipeline([("imputer", SimpleImputer(strategy="median")), ("scaler", StandardScaler())]), feature_cols)
|
| 137 |
+
],
|
| 138 |
+
remainder="drop",
|
| 139 |
+
)
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
def normalize_predictions(y_pred: np.ndarray) -> np.ndarray:
|
| 143 |
+
y_pred = np.clip(y_pred, 0, 1)
|
| 144 |
+
sums = y_pred.sum(axis=1, keepdims=True)
|
| 145 |
+
sums[sums == 0] = 1
|
| 146 |
+
return y_pred / sums
|
| 147 |
+
|
| 148 |
+
|
| 149 |
+
def regression_metrics(y_true: np.ndarray, y_pred: np.ndarray) -> Dict[str, float]:
|
| 150 |
+
y_true = np.asarray(y_true)
|
| 151 |
+
y_pred = np.asarray(y_pred)
|
| 152 |
+
y_pred = normalize_predictions(y_pred)
|
| 153 |
+
y_true_flat = y_true.reshape(-1)
|
| 154 |
+
y_pred_flat = y_pred.reshape(-1)
|
| 155 |
+
mae = float(mean_absolute_error(y_true_flat, y_pred_flat))
|
| 156 |
+
rmse = float(np.sqrt(mean_squared_error(y_true_flat, y_pred_flat)))
|
| 157 |
+
medae = float(median_absolute_error(y_true_flat, y_pred_flat))
|
| 158 |
+
r2 = float(r2_score(y_true_flat, y_pred_flat)) if len(y_true_flat) > 1 else np.nan
|
| 159 |
+
evs = float(explained_variance_score(y_true_flat, y_pred_flat)) if len(y_true_flat) > 1 else np.nan
|
| 160 |
+
denom = float(np.sum(np.abs(y_true_flat)))
|
| 161 |
+
wape = float(np.sum(np.abs(y_true_flat - y_pred_flat)) / denom) if denom > 0 else np.nan
|
| 162 |
+
smape = float(np.mean(2 * np.abs(y_pred_flat - y_true_flat) / (np.abs(y_true_flat) + np.abs(y_pred_flat) + 1e-9)))
|
| 163 |
+
bias = float(np.mean(y_pred_flat - y_true_flat))
|
| 164 |
+
winner_true = np.argmax(y_true, axis=1)
|
| 165 |
+
winner_pred = np.argmax(y_pred, axis=1)
|
| 166 |
+
winner_acc = float(np.mean(winner_true == winner_pred)) if len(winner_true) else np.nan
|
| 167 |
+
metrics = {
|
| 168 |
+
"mae_mean": mae,
|
| 169 |
+
"rmse": rmse,
|
| 170 |
+
"medae": medae,
|
| 171 |
+
"r2": r2,
|
| 172 |
+
"explained_var": evs,
|
| 173 |
+
"wape": wape,
|
| 174 |
+
"smape": smape,
|
| 175 |
+
"bias": bias,
|
| 176 |
+
"winner_accuracy": winner_acc,
|
| 177 |
+
}
|
| 178 |
+
for idx, cat in enumerate(CANDIDATE_CATEGORIES):
|
| 179 |
+
metrics[f"mae_{cat}"] = float(mean_absolute_error(y_true[:, idx], y_pred[:, idx]))
|
| 180 |
+
return metrics
|
| 181 |
+
|
| 182 |
+
|
| 183 |
+
def build_event_folds(df: pd.DataFrame, n_splits: int) -> List[Tuple[np.ndarray, np.ndarray]]:
|
| 184 |
+
if df.empty:
|
| 185 |
+
return []
|
| 186 |
+
work = df.copy()
|
| 187 |
+
work["date_scrutin"] = pd.to_datetime(work.get("date_scrutin"), errors="coerce") # type: ignore
|
| 188 |
+
if work["date_scrutin"].isna().all():
|
| 189 |
+
work["date_scrutin"] = pd.to_datetime(work["election_year"], format="%Y", errors="coerce")
|
| 190 |
+
work["event_key"] = (
|
| 191 |
+
work["election_type"].astype(str).str.lower().str.strip()
|
| 192 |
+
+ "|"
|
| 193 |
+
+ work["election_year"].astype(str)
|
| 194 |
+
+ "|"
|
| 195 |
+
+ work["round"].astype(str)
|
| 196 |
+
)
|
| 197 |
+
events = (
|
| 198 |
+
work[["event_key", "date_scrutin"]]
|
| 199 |
+
.dropna(subset=["event_key", "date_scrutin"])
|
| 200 |
+
.drop_duplicates()
|
| 201 |
+
.sort_values("date_scrutin")
|
| 202 |
+
.reset_index(drop=True)
|
| 203 |
+
)
|
| 204 |
+
if len(events) < 2:
|
| 205 |
+
return []
|
| 206 |
+
max_splits = min(n_splits, len(events) - 1)
|
| 207 |
+
tscv = TimeSeriesSplit(n_splits=max_splits)
|
| 208 |
+
folds = []
|
| 209 |
+
for train_evt_idx, test_evt_idx in tscv.split(events):
|
| 210 |
+
train_keys = set(events.iloc[train_evt_idx]["event_key"])
|
| 211 |
+
test_keys = set(events.iloc[test_evt_idx]["event_key"])
|
| 212 |
+
train_idx = work.index[work["event_key"].isin(train_keys)].to_numpy()
|
| 213 |
+
test_idx = work.index[work["event_key"].isin(test_keys)].to_numpy()
|
| 214 |
+
folds.append((train_idx, test_idx))
|
| 215 |
+
return folds
|
| 216 |
+
|
| 217 |
+
|
| 218 |
+
class TwoStageRegressor(BaseEstimator, RegressorMixin):
|
| 219 |
+
def __init__(
|
| 220 |
+
self,
|
| 221 |
+
classifier: Optional[BaseEstimator] = None,
|
| 222 |
+
regressor: Optional[BaseEstimator] = None,
|
| 223 |
+
epsilon: float = 1e-4,
|
| 224 |
+
positive_threshold: float = 0.5,
|
| 225 |
+
use_proba: bool = True,
|
| 226 |
+
use_logit: bool = True,
|
| 227 |
+
logit_eps: float = 1e-6,
|
| 228 |
+
) -> None:
|
| 229 |
+
self.classifier = classifier
|
| 230 |
+
self.regressor = regressor
|
| 231 |
+
self.epsilon = epsilon
|
| 232 |
+
self.positive_threshold = positive_threshold
|
| 233 |
+
self.use_proba = use_proba
|
| 234 |
+
self.use_logit = use_logit
|
| 235 |
+
self.logit_eps = logit_eps
|
| 236 |
+
|
| 237 |
+
def _default_classifier(self) -> BaseEstimator:
|
| 238 |
+
return HistGradientBoostingClassifier(random_state=42)
|
| 239 |
+
|
| 240 |
+
def _default_regressor(self) -> BaseEstimator:
|
| 241 |
+
return HistGradientBoostingRegressor(random_state=42)
|
| 242 |
+
|
| 243 |
+
def fit(self, X, y):
|
| 244 |
+
y = np.asarray(y).ravel()
|
| 245 |
+
mask_pos = y > self.epsilon
|
| 246 |
+
|
| 247 |
+
self._constant_proba = None
|
| 248 |
+
if mask_pos.all() or (~mask_pos).all():
|
| 249 |
+
self._constant_proba = float(mask_pos.mean())
|
| 250 |
+
self.classifier_ = None
|
| 251 |
+
else:
|
| 252 |
+
classifier = self.classifier if self.classifier is not None else self._default_classifier()
|
| 253 |
+
self.classifier_ = clone(classifier)
|
| 254 |
+
self.classifier_.fit(X, mask_pos.astype(int))
|
| 255 |
+
|
| 256 |
+
self.regressor_ = None
|
| 257 |
+
if mask_pos.any():
|
| 258 |
+
regressor = self.regressor if self.regressor is not None else self._default_regressor()
|
| 259 |
+
self.regressor_ = clone(regressor)
|
| 260 |
+
y_reg = y[mask_pos]
|
| 261 |
+
if self.use_logit:
|
| 262 |
+
y_reg = np.clip(y_reg, self.logit_eps, 1 - self.logit_eps)
|
| 263 |
+
y_reg = np.log(y_reg / (1 - y_reg))
|
| 264 |
+
self.regressor_.fit(X[mask_pos], y_reg)
|
| 265 |
+
return self
|
| 266 |
+
|
| 267 |
+
def predict(self, X):
|
| 268 |
+
if self._constant_proba is not None:
|
| 269 |
+
proba = np.full(len(X), self._constant_proba, dtype=float)
|
| 270 |
+
else:
|
| 271 |
+
check_is_fitted(self, ["classifier_"])
|
| 272 |
+
if self.use_proba and hasattr(self.classifier_, "predict_proba"):
|
| 273 |
+
proba = self.classifier_.predict_proba(X)[:, 1] # type: ignore
|
| 274 |
+
else:
|
| 275 |
+
proba = self.classifier_.predict(X) # type: ignore
|
| 276 |
+
proba = np.asarray(proba, dtype=float)
|
| 277 |
+
|
| 278 |
+
if self.regressor_ is None:
|
| 279 |
+
reg_pred = np.zeros(len(proba), dtype=float)
|
| 280 |
+
else:
|
| 281 |
+
reg_pred = np.asarray(self.regressor_.predict(X), dtype=float)
|
| 282 |
+
if self.use_logit:
|
| 283 |
+
reg_pred = 1 / (1 + np.exp(-reg_pred))
|
| 284 |
+
reg_pred = np.clip(reg_pred, 0, 1)
|
| 285 |
+
|
| 286 |
+
if self.use_proba:
|
| 287 |
+
preds = proba * reg_pred
|
| 288 |
+
else:
|
| 289 |
+
preds = np.where(proba >= self.positive_threshold, reg_pred, 0.0)
|
| 290 |
+
return preds
|
| 291 |
+
|
| 292 |
+
|
| 293 |
+
class CatBoostRegressorWrapper(BaseEstimator, RegressorMixin):
|
| 294 |
+
def __init__(self, **params: float | int | str):
|
| 295 |
+
self.params = dict(params)
|
| 296 |
+
self.model_ = None
|
| 297 |
+
|
| 298 |
+
def fit(self, X, y, **fit_params):
|
| 299 |
+
from catboost import CatBoostRegressor
|
| 300 |
+
|
| 301 |
+
self.model_ = CatBoostRegressor(**self.params) # type: ignore
|
| 302 |
+
self.model_.fit(X, y, **fit_params)
|
| 303 |
+
return self
|
| 304 |
+
|
| 305 |
+
def predict(self, X):
|
| 306 |
+
if self.model_ is None:
|
| 307 |
+
raise ValueError("CatBoostRegressorWrapper n'est pas entraîné.")
|
| 308 |
+
return self.model_.predict(X)
|
| 309 |
+
|
| 310 |
+
def get_params(self, deep: bool = True):
|
| 311 |
+
return dict(self.params)
|
| 312 |
+
|
| 313 |
+
def set_params(self, **params):
|
| 314 |
+
self.params.update(params)
|
| 315 |
+
return self
|
| 316 |
+
|
| 317 |
+
|
| 318 |
+
def make_model(model_name: str, feature_cols: List[str], params: Dict[str, object]) -> Optional[Pipeline]:
|
| 319 |
+
preprocessor = make_preprocessor(feature_cols)
|
| 320 |
+
if model_name == "ridge":
|
| 321 |
+
estimator = Ridge(**params) # type: ignore
|
| 322 |
+
elif model_name == "hist_gradient_boosting":
|
| 323 |
+
estimator = HistGradientBoostingRegressor(random_state=42, **params) # type: ignore
|
| 324 |
+
elif model_name == "lightgbm":
|
| 325 |
+
try:
|
| 326 |
+
from lightgbm import LGBMRegressor
|
| 327 |
+
except Exception:
|
| 328 |
+
LOGGER.info("LightGBM indisponible, ignoré.")
|
| 329 |
+
return None
|
| 330 |
+
estimator = LGBMRegressor(random_state=42, force_row_wise=True, verbosity=-1, **params) # type: ignore
|
| 331 |
+
elif model_name == "xgboost":
|
| 332 |
+
try:
|
| 333 |
+
from xgboost import XGBRegressor
|
| 334 |
+
except Exception:
|
| 335 |
+
LOGGER.info("XGBoost indisponible, ignoré.")
|
| 336 |
+
return None
|
| 337 |
+
estimator = XGBRegressor(random_state=42, **params)
|
| 338 |
+
elif model_name == "two_stage_hgb":
|
| 339 |
+
clf_params = params.get("clf_params", {})
|
| 340 |
+
reg_params = params.get("reg_params", {})
|
| 341 |
+
estimator = TwoStageRegressor(
|
| 342 |
+
classifier=HistGradientBoostingClassifier(random_state=42, **clf_params), # type: ignore
|
| 343 |
+
regressor=HistGradientBoostingRegressor(random_state=42, **reg_params), # type: ignore
|
| 344 |
+
epsilon=params.get("epsilon", 1e-4), # type: ignore
|
| 345 |
+
positive_threshold=params.get("positive_threshold", 0.5), # type: ignore
|
| 346 |
+
use_proba=bool(params.get("use_proba", True)),
|
| 347 |
+
use_logit=bool(params.get("use_logit", True)),
|
| 348 |
+
logit_eps=params.get("logit_eps", 1e-6), # type: ignore
|
| 349 |
+
)
|
| 350 |
+
elif model_name == "catboost":
|
| 351 |
+
try:
|
| 352 |
+
from catboost import CatBoostRegressor
|
| 353 |
+
except Exception:
|
| 354 |
+
LOGGER.info("CatBoost indisponible, ignoré.")
|
| 355 |
+
return None
|
| 356 |
+
if not hasattr(CatBoostRegressor, "__sklearn_tags__"):
|
| 357 |
+
estimator = CatBoostRegressorWrapper(verbose=0, random_state=42, **params) # type: ignore
|
| 358 |
+
else:
|
| 359 |
+
estimator = CatBoostRegressor(verbose=0, random_state=42, **params) # type: ignore
|
| 360 |
+
else:
|
| 361 |
+
raise ValueError(f"Modèle inconnu: {model_name}")
|
| 362 |
+
# n_jobs=1 to avoid process-based parallelism issues in some environments.
|
| 363 |
+
model = MultiOutputRegressor(estimator, n_jobs=1) # type: ignore
|
| 364 |
+
return Pipeline(
|
| 365 |
+
steps=[
|
| 366 |
+
("preprocess", preprocessor),
|
| 367 |
+
("model", model),
|
| 368 |
+
]
|
| 369 |
+
)
|
| 370 |
+
|
| 371 |
+
|
| 372 |
+
def evaluate(model: Pipeline, X, y_true: np.ndarray) -> Dict[str, float]:
|
| 373 |
+
if X is None or len(X) == 0:
|
| 374 |
+
return {"mae_mean": np.nan}
|
| 375 |
+
y_pred = model.predict(X)
|
| 376 |
+
return regression_metrics(y_true, y_pred) # type: ignore
|
| 377 |
+
|
| 378 |
+
|
| 379 |
+
def evaluate_cv(
|
| 380 |
+
model: Pipeline,
|
| 381 |
+
df: pd.DataFrame,
|
| 382 |
+
feature_cols: List[str],
|
| 383 |
+
n_splits: int,
|
| 384 |
+
target_cols: List[str],
|
| 385 |
+
) -> Dict[str, float]:
|
| 386 |
+
folds = build_event_folds(df, n_splits)
|
| 387 |
+
if not folds:
|
| 388 |
+
return {"folds_used": 0}
|
| 389 |
+
metrics_acc: Dict[str, list[float]] = {}
|
| 390 |
+
for train_idx, test_idx in folds:
|
| 391 |
+
model_clone = clone(model)
|
| 392 |
+
X_train = df.iloc[train_idx][feature_cols]
|
| 393 |
+
y_train = df.iloc[train_idx][target_cols].values
|
| 394 |
+
X_test = df.iloc[test_idx][feature_cols]
|
| 395 |
+
y_test = df.iloc[test_idx][target_cols].values
|
| 396 |
+
model_clone.fit(X_train, y_train)
|
| 397 |
+
fold_metrics = evaluate(model_clone, X_test, y_test)
|
| 398 |
+
for key, value in fold_metrics.items():
|
| 399 |
+
metrics_acc.setdefault(key, []).append(value)
|
| 400 |
+
summary = {f"cv_{k}": float(np.nanmean(v)) for k, v in metrics_acc.items()}
|
| 401 |
+
summary["folds_used"] = len(folds)
|
| 402 |
+
return summary
|
| 403 |
+
|
| 404 |
+
|
| 405 |
+
def compute_cv_residual_intervals(
|
| 406 |
+
model: Pipeline,
|
| 407 |
+
df: pd.DataFrame,
|
| 408 |
+
feature_cols: List[str],
|
| 409 |
+
target_cols: List[str],
|
| 410 |
+
n_splits: int,
|
| 411 |
+
quantiles: Tuple[float, ...] = (0.05, 0.1, 0.9, 0.95),
|
| 412 |
+
) -> Dict[str, object]:
|
| 413 |
+
folds = build_event_folds(df, n_splits)
|
| 414 |
+
if not folds:
|
| 415 |
+
return {"folds_used": 0, "quantiles": list(quantiles), "residuals": {}}
|
| 416 |
+
|
| 417 |
+
residuals_by_cat: Dict[str, list[float]] = {cat: [] for cat in CANDIDATE_CATEGORIES}
|
| 418 |
+
for train_idx, test_idx in folds:
|
| 419 |
+
model_clone = clone(model)
|
| 420 |
+
X_train = df.iloc[train_idx][feature_cols]
|
| 421 |
+
y_train = df.iloc[train_idx][target_cols].values
|
| 422 |
+
X_test = df.iloc[test_idx][feature_cols]
|
| 423 |
+
y_test = df.iloc[test_idx][target_cols].values
|
| 424 |
+
model_clone.fit(X_train, y_train)
|
| 425 |
+
y_pred = model_clone.predict(X_test)
|
| 426 |
+
y_pred = normalize_predictions(y_pred)
|
| 427 |
+
resid = y_pred - y_test
|
| 428 |
+
for idx, cat in enumerate(CANDIDATE_CATEGORIES):
|
| 429 |
+
residuals_by_cat[cat].extend(resid[:, idx].tolist())
|
| 430 |
+
|
| 431 |
+
quantile_keys = [f"q{int(q * 100):02d}" for q in quantiles]
|
| 432 |
+
summary: Dict[str, Dict[str, float]] = {}
|
| 433 |
+
for cat, values in residuals_by_cat.items():
|
| 434 |
+
arr = np.asarray(values, dtype=float)
|
| 435 |
+
if arr.size == 0:
|
| 436 |
+
continue
|
| 437 |
+
q_vals = np.quantile(arr, quantiles).tolist()
|
| 438 |
+
entry = {key: float(val) for key, val in zip(quantile_keys, q_vals)}
|
| 439 |
+
entry["mean"] = float(np.mean(arr))
|
| 440 |
+
entry["std"] = float(np.std(arr))
|
| 441 |
+
entry["n"] = int(arr.size)
|
| 442 |
+
summary[cat] = entry
|
| 443 |
+
|
| 444 |
+
return {
|
| 445 |
+
"folds_used": len(folds),
|
| 446 |
+
"quantiles": list(quantiles),
|
| 447 |
+
"residuals": summary,
|
| 448 |
+
}
|
| 449 |
+
|
| 450 |
+
|
| 451 |
+
def add_cv_selection_helpers(cv_summary: pd.DataFrame) -> pd.DataFrame:
|
| 452 |
+
work = cv_summary.copy()
|
| 453 |
+
block_cols = [c for c in work.columns if c.startswith("cv_mae_") and c != "cv_mae_mean"]
|
| 454 |
+
if block_cols:
|
| 455 |
+
work["worst_block_mae"] = work[block_cols].max(axis=1)
|
| 456 |
+
if "cv_bias" in work.columns:
|
| 457 |
+
work["bias_abs"] = work["cv_bias"].abs()
|
| 458 |
+
return work
|
| 459 |
+
|
| 460 |
+
|
| 461 |
+
def select_best_model(cv_summary: pd.DataFrame) -> Tuple[str, Dict[str, object]]:
|
| 462 |
+
if cv_summary.empty:
|
| 463 |
+
raise RuntimeError("Aucun modèle évalué.")
|
| 464 |
+
work = add_cv_selection_helpers(cv_summary)
|
| 465 |
+
bias_threshold = 0.02
|
| 466 |
+
candidates = work
|
| 467 |
+
if "bias_abs" in work.columns:
|
| 468 |
+
filtered = work[work["bias_abs"] <= bias_threshold]
|
| 469 |
+
if not filtered.empty:
|
| 470 |
+
candidates = filtered
|
| 471 |
+
sort_cols = [c for c in ["cv_mae_mean", "worst_block_mae", "bias_abs", "cv_rmse", "cv_smape"] if c in candidates.columns]
|
| 472 |
+
best_row = candidates.sort_values(sort_cols, na_position="last").iloc[0]
|
| 473 |
+
return str(best_row["model"]), dict(best_row["params"])
|
| 474 |
+
|
| 475 |
+
|
| 476 |
+
def save_metrics(
|
| 477 |
+
metrics: Dict[str, Dict[str, Dict[str, float]]],
|
| 478 |
+
output_dir: Path,
|
| 479 |
+
cv_summary: pd.DataFrame | None = None,
|
| 480 |
+
) -> None:
|
| 481 |
+
output_dir.mkdir(parents=True, exist_ok=True)
|
| 482 |
+
with (output_dir / "metrics.json").open("w", encoding="utf-8") as f:
|
| 483 |
+
json.dump(metrics, f, indent=2)
|
| 484 |
+
|
| 485 |
+
if cv_summary is not None and not cv_summary.empty:
|
| 486 |
+
cv_summary.to_csv(output_dir / "cv_summary.csv", index=False)
|
| 487 |
+
lines = ["# Métriques (parts, 0-1)\n"]
|
| 488 |
+
for model_name, splits in metrics.items():
|
| 489 |
+
lines.append(f"## {model_name}")
|
| 490 |
+
for split, vals in splits.items():
|
| 491 |
+
lines.append(
|
| 492 |
+
f"- {split} mae_mean: {vals.get('mae_mean', float('nan')):.4f}, "
|
| 493 |
+
f"rmse: {vals.get('rmse', float('nan')):.4f}, "
|
| 494 |
+
f"wape: {vals.get('wape', float('nan')):.4f}, "
|
| 495 |
+
f"winner_acc: {vals.get('winner_accuracy', float('nan')):.3f}"
|
| 496 |
+
)
|
| 497 |
+
lines.append("")
|
| 498 |
+
(output_dir / "metrics.md").write_text("\n".join(lines), encoding="utf-8")
|
| 499 |
+
|
| 500 |
+
|
| 501 |
+
def save_model_card(
|
| 502 |
+
model_name: str,
|
| 503 |
+
cfg: SplitConfig,
|
| 504 |
+
feature_cols: List[str],
|
| 505 |
+
metrics: Dict[str, Dict[str, Dict[str, float]]],
|
| 506 |
+
output_dir: Path,
|
| 507 |
+
) -> None:
|
| 508 |
+
lines = [
|
| 509 |
+
"# Model card",
|
| 510 |
+
f"- Modèle: {model_name}",
|
| 511 |
+
f"- Split temporel: train<= {cfg.train_end_year}, valid<= {cfg.valid_end_year}, test>= {cfg.test_start_year}",
|
| 512 |
+
f"- Features: {len(feature_cols)} colonnes numériques (lags, écarts national, swing, turnout)",
|
| 513 |
+
"- Cibles: parts par bloc (7 catégories) renormalisées.",
|
| 514 |
+
"- Métriques principales (MAE moyen, jeux valid/test):",
|
| 515 |
+
f" - Valid: {metrics[model_name]['valid'].get('mae_mean', float('nan')):.4f}",
|
| 516 |
+
f" - Test: {metrics[model_name]['test'].get('mae_mean', float('nan')):.4f}",
|
| 517 |
+
]
|
| 518 |
+
output_dir.mkdir(parents=True, exist_ok=True)
|
| 519 |
+
(output_dir / "model_card.md").write_text("\n".join(lines), encoding="utf-8")
|
| 520 |
+
|
| 521 |
+
|
| 522 |
+
def plot_mae_per_category(model_name: str, mae_scores: Dict[str, float], output_dir: Path) -> None:
|
| 523 |
+
try:
|
| 524 |
+
import matplotlib.pyplot as plt
|
| 525 |
+
except Exception:
|
| 526 |
+
LOGGER.warning("Matplotlib indisponible, skip figure.")
|
| 527 |
+
return
|
| 528 |
+
if not all(f"mae_{c}" in mae_scores for c in CANDIDATE_CATEGORIES):
|
| 529 |
+
LOGGER.warning("Scores MAE par categorie indisponibles, skip figure.")
|
| 530 |
+
return
|
| 531 |
+
cats = CANDIDATE_CATEGORIES
|
| 532 |
+
values = [mae_scores[f"mae_{c}"] for c in cats]
|
| 533 |
+
plt.figure(figsize=(8, 4))
|
| 534 |
+
plt.bar(cats, values, color="#2c7fb8")
|
| 535 |
+
plt.xticks(rotation=30, ha="right")
|
| 536 |
+
plt.ylabel("MAE (part)")
|
| 537 |
+
plt.title(f"MAE par catégorie - {model_name}")
|
| 538 |
+
output_dir.mkdir(parents=True, exist_ok=True)
|
| 539 |
+
plt.tight_layout()
|
| 540 |
+
plt.savefig(output_dir / "mae_per_category.png")
|
| 541 |
+
plt.close()
|
| 542 |
+
|
| 543 |
+
|
| 544 |
+
def main() -> None:
|
| 545 |
+
parser = argparse.ArgumentParser(description="Entraînement et évaluation temporelle multi-blocs.")
|
| 546 |
+
parser.add_argument("--panel", type=Path, default=Path("data/processed/panel.parquet"), help="Dataset panel parquet.")
|
| 547 |
+
parser.add_argument("--models-dir", type=Path, default=Path("models"), help="Répertoire de sauvegarde des modèles.")
|
| 548 |
+
parser.add_argument("--reports-dir", type=Path, default=Path("reports"), help="Répertoire de sortie des rapports.")
|
| 549 |
+
parser.add_argument("--train-end-year", type=int, default=2019, help="Dernière année incluse dans le train.")
|
| 550 |
+
parser.add_argument("--valid-end-year", type=int, default=2021, help="Dernière année incluse dans la validation.")
|
| 551 |
+
parser.add_argument("--test-start-year", type=int, default=2022, help="Première année du test (inclusif).")
|
| 552 |
+
parser.add_argument("--cv-splits", type=int, default=4, help="Nombre de folds temporels pour la CV par scrutin.")
|
| 553 |
+
parser.add_argument("--no-tune", action="store_true", help="Désactiver la recherche d'hyperparamètres.")
|
| 554 |
+
parser.add_argument("--max-trials", type=int, default=0, help="Limiter le nombre d'essais par modèle (0=all).")
|
| 555 |
+
parser.add_argument(
|
| 556 |
+
"--models",
|
| 557 |
+
nargs="+",
|
| 558 |
+
default=list(MODEL_GRIDS.keys()),
|
| 559 |
+
help="Liste des modèles à tester (ridge, hist_gradient_boosting, lightgbm, xgboost, two_stage_hgb, catboost).",
|
| 560 |
+
)
|
| 561 |
+
args = parser.parse_args()
|
| 562 |
+
|
| 563 |
+
logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s")
|
| 564 |
+
cfg = SplitConfig(train_end_year=args.train_end_year, valid_end_year=args.valid_end_year, test_start_year=args.test_start_year)
|
| 565 |
+
|
| 566 |
+
panel = load_panel(args.panel)
|
| 567 |
+
panel = panel.dropna(subset=TARGET_COLS)
|
| 568 |
+
feature_cols = get_feature_columns(panel)
|
| 569 |
+
all_na = [c for c in feature_cols if panel[c].isna().all()]
|
| 570 |
+
if all_na:
|
| 571 |
+
LOGGER.warning("Features supprimées car entièrement NA: %s", all_na)
|
| 572 |
+
feature_cols = [c for c in feature_cols if c not in all_na]
|
| 573 |
+
|
| 574 |
+
train_df, valid_df, test_df = temporal_split(panel, cfg)
|
| 575 |
+
train_valid_df = panel[panel["election_year"] < cfg.test_start_year].copy().reset_index(drop=True)
|
| 576 |
+
|
| 577 |
+
models_to_run = [m for m in args.models if m in MODEL_GRIDS]
|
| 578 |
+
if not models_to_run:
|
| 579 |
+
raise RuntimeError("Aucun modèle demandé n'est reconnu.")
|
| 580 |
+
|
| 581 |
+
cv_rows: List[Dict[str, object]] = []
|
| 582 |
+
if not args.no_tune:
|
| 583 |
+
rng = np.random.default_rng(42)
|
| 584 |
+
for model_name in models_to_run:
|
| 585 |
+
grid = MODEL_GRIDS[model_name]
|
| 586 |
+
if args.max_trials and len(grid) > args.max_trials:
|
| 587 |
+
indices = rng.choice(len(grid), size=args.max_trials, replace=False)
|
| 588 |
+
grid = [grid[i] for i in indices]
|
| 589 |
+
for params in grid:
|
| 590 |
+
model = make_model(model_name, feature_cols, params)
|
| 591 |
+
if model is None:
|
| 592 |
+
continue
|
| 593 |
+
cv_metrics = evaluate_cv(model, train_valid_df, feature_cols, args.cv_splits, TARGET_COLS)
|
| 594 |
+
row = {"model": model_name, "params": params, **cv_metrics}
|
| 595 |
+
cv_rows.append(row)
|
| 596 |
+
|
| 597 |
+
cv_summary = pd.DataFrame(cv_rows)
|
| 598 |
+
if not cv_summary.empty:
|
| 599 |
+
cv_summary = cv_summary.dropna(subset=["cv_mae_mean"])
|
| 600 |
+
cv_summary = add_cv_selection_helpers(cv_summary)
|
| 601 |
+
if not cv_summary.empty:
|
| 602 |
+
best_model_name, best_params = select_best_model(cv_summary)
|
| 603 |
+
LOGGER.info("Meilleur modèle CV: %s %s", best_model_name, best_params)
|
| 604 |
+
else:
|
| 605 |
+
best_model_name = models_to_run[0]
|
| 606 |
+
best_params = MODEL_GRIDS[best_model_name][0]
|
| 607 |
+
LOGGER.warning("Pas de CV disponible, fallback sur %s %s", best_model_name, best_params)
|
| 608 |
+
|
| 609 |
+
residual_payload = {}
|
| 610 |
+
model_for_intervals = make_model(best_model_name, feature_cols, best_params)
|
| 611 |
+
if model_for_intervals is not None and not train_valid_df.empty:
|
| 612 |
+
residual_payload = compute_cv_residual_intervals(
|
| 613 |
+
model_for_intervals,
|
| 614 |
+
train_valid_df,
|
| 615 |
+
feature_cols,
|
| 616 |
+
TARGET_COLS,
|
| 617 |
+
args.cv_splits,
|
| 618 |
+
)
|
| 619 |
+
if residual_payload.get("residuals"):
|
| 620 |
+
args.reports_dir.mkdir(parents=True, exist_ok=True)
|
| 621 |
+
(args.reports_dir / "residual_intervals.json").write_text(
|
| 622 |
+
json.dumps(
|
| 623 |
+
{
|
| 624 |
+
"model": best_model_name,
|
| 625 |
+
**residual_payload,
|
| 626 |
+
},
|
| 627 |
+
indent=2,
|
| 628 |
+
),
|
| 629 |
+
encoding="utf-8",
|
| 630 |
+
)
|
| 631 |
+
|
| 632 |
+
X_train, y_train = train_df[feature_cols], train_df[TARGET_COLS].values
|
| 633 |
+
X_valid, y_valid = valid_df[feature_cols], valid_df[TARGET_COLS].values
|
| 634 |
+
X_test, y_test = test_df[feature_cols], test_df[TARGET_COLS].values
|
| 635 |
+
X_train_valid, y_train_valid = train_valid_df[feature_cols], train_valid_df[TARGET_COLS].values
|
| 636 |
+
|
| 637 |
+
eval_results: Dict[str, Dict[str, Dict[str, float]]] = {}
|
| 638 |
+
best_model_eval = make_model(best_model_name, feature_cols, best_params)
|
| 639 |
+
if best_model_eval is None:
|
| 640 |
+
raise RuntimeError(f"Modèle indisponible: {best_model_name}")
|
| 641 |
+
best_model_eval.fit(X_train, y_train)
|
| 642 |
+
eval_results[best_model_name] = {
|
| 643 |
+
"train": evaluate(best_model_eval, X_train, y_train),
|
| 644 |
+
"valid": evaluate(best_model_eval, X_valid, y_valid),
|
| 645 |
+
"test": evaluate(best_model_eval, X_test, y_test),
|
| 646 |
+
"train_valid": evaluate(best_model_eval, X_train_valid, y_train_valid),
|
| 647 |
+
}
|
| 648 |
+
|
| 649 |
+
best_model_final = make_model(best_model_name, feature_cols, best_params)
|
| 650 |
+
if best_model_final is None:
|
| 651 |
+
raise RuntimeError(f"Modèle indisponible: {best_model_name}")
|
| 652 |
+
best_model_final.fit(X_train_valid, y_train_valid)
|
| 653 |
+
|
| 654 |
+
args.models_dir.mkdir(parents=True, exist_ok=True)
|
| 655 |
+
joblib.dump(best_model_final, args.models_dir / f"{best_model_name}.joblib")
|
| 656 |
+
LOGGER.info("Modèle sauvegardé dans %s", args.models_dir / f"{best_model_name}.joblib")
|
| 657 |
+
(args.models_dir / "feature_columns.json").write_text(json.dumps(feature_cols, indent=2), encoding="utf-8")
|
| 658 |
+
(args.models_dir / "best_model.json").write_text(json.dumps({"name": best_model_name}, indent=2), encoding="utf-8")
|
| 659 |
+
|
| 660 |
+
save_metrics(eval_results, args.reports_dir, cv_summary=cv_summary)
|
| 661 |
+
plot_mae_per_category(best_model_name, eval_results[best_model_name]["test"], args.reports_dir / "figures")
|
| 662 |
+
save_model_card(best_model_name, cfg, feature_cols, eval_results, args.models_dir)
|
| 663 |
+
|
| 664 |
+
|
| 665 |
+
if __name__ == "__main__":
|
| 666 |
+
main()
|
src/pipeline.py
ADDED
|
@@ -0,0 +1,435 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
from typing import Iterable, Mapping, Optional
|
| 5 |
+
|
| 6 |
+
import pandas as pd
|
| 7 |
+
import re
|
| 8 |
+
import yaml
|
| 9 |
+
|
| 10 |
+
from .constants import CANDIDATE_CATEGORIES
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def normalize_bloc(bloc: str | None) -> str:
|
| 14 |
+
"""
|
| 15 |
+
Map bloc labels to the canonical categories used across the project.
|
| 16 |
+
"""
|
| 17 |
+
if bloc is None:
|
| 18 |
+
return "centre"
|
| 19 |
+
norm = str(bloc).strip().lower().replace(" ", "_").replace("-", "_")
|
| 20 |
+
synonyms = {
|
| 21 |
+
"droite_moderee": "droite_modere",
|
| 22 |
+
"gauche_moderee": "gauche_modere",
|
| 23 |
+
"doite_dure": "droite_dure",
|
| 24 |
+
"gauche": "gauche_modere",
|
| 25 |
+
"droite": "droite_modere",
|
| 26 |
+
"divers": "centre",
|
| 27 |
+
"divers_droite": "droite_modere",
|
| 28 |
+
"divers_gauche": "gauche_modere",
|
| 29 |
+
"divers_centre": "centre",
|
| 30 |
+
"extreme_gauche": "extreme_gauche",
|
| 31 |
+
"extreme_droite": "extreme_droite",
|
| 32 |
+
}
|
| 33 |
+
norm = synonyms.get(norm, norm)
|
| 34 |
+
if norm not in CANDIDATE_CATEGORIES:
|
| 35 |
+
return "centre"
|
| 36 |
+
return norm
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
DEFAULT_COMMUNES_PATH = (Path(__file__).resolve().parents[1] / "config" / "communes.yaml")
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def _normalize_insee_code(value: str | int | None) -> str:
|
| 43 |
+
if value is None:
|
| 44 |
+
return ""
|
| 45 |
+
cleaned = (
|
| 46 |
+
str(value)
|
| 47 |
+
.strip()
|
| 48 |
+
.replace(".0", "")
|
| 49 |
+
)
|
| 50 |
+
cleaned = re.sub(r"\D", "", cleaned)
|
| 51 |
+
if not cleaned:
|
| 52 |
+
return ""
|
| 53 |
+
if len(cleaned) >= 5:
|
| 54 |
+
return cleaned[:5]
|
| 55 |
+
return cleaned.zfill(5)
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def load_target_communes(path: Path = DEFAULT_COMMUNES_PATH) -> dict[str, str]:
|
| 59 |
+
if not path.exists():
|
| 60 |
+
raise FileNotFoundError(f"Fichier communes introuvable: {path}")
|
| 61 |
+
raw = yaml.safe_load(path.read_text()) or {}
|
| 62 |
+
entries = raw.get("communes", raw) if isinstance(raw, dict) else raw
|
| 63 |
+
communes: dict[str, str] = {}
|
| 64 |
+
|
| 65 |
+
if isinstance(entries, dict):
|
| 66 |
+
for code, name in entries.items():
|
| 67 |
+
norm = _normalize_insee_code(code)
|
| 68 |
+
if norm:
|
| 69 |
+
communes[norm] = str(name) if name is not None else ""
|
| 70 |
+
return communes
|
| 71 |
+
|
| 72 |
+
if not isinstance(entries, list):
|
| 73 |
+
raise ValueError("Format YAML invalide: attendu une liste ou un mapping sous 'communes'.")
|
| 74 |
+
|
| 75 |
+
for entry in entries:
|
| 76 |
+
if isinstance(entry, str):
|
| 77 |
+
norm = _normalize_insee_code(entry)
|
| 78 |
+
if norm:
|
| 79 |
+
communes[norm] = ""
|
| 80 |
+
continue
|
| 81 |
+
if isinstance(entry, dict):
|
| 82 |
+
code = entry.get("code_insee") or entry.get("code") or entry.get("insee")
|
| 83 |
+
name = entry.get("nom") or entry.get("name") or ""
|
| 84 |
+
norm = _normalize_insee_code(code)
|
| 85 |
+
if norm:
|
| 86 |
+
communes[norm] = str(name) if name is not None else ""
|
| 87 |
+
continue
|
| 88 |
+
return communes
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
def load_elections_long(path: Path) -> pd.DataFrame:
|
| 92 |
+
"""
|
| 93 |
+
Load the harmonised long format dataset (output of notebook 01_pretraitement).
|
| 94 |
+
"""
|
| 95 |
+
if path.suffix == ".parquet":
|
| 96 |
+
df = pd.read_parquet(path)
|
| 97 |
+
else:
|
| 98 |
+
df = pd.read_csv(path, sep=";")
|
| 99 |
+
df["date_scrutin"] = pd.to_datetime(df["date_scrutin"])
|
| 100 |
+
numeric_cols = ["exprimes", "inscrits", "votants", "voix", "blancs", "nuls"]
|
| 101 |
+
for col in numeric_cols:
|
| 102 |
+
if col in df.columns:
|
| 103 |
+
df[col] = pd.to_numeric(df[col], errors="coerce")
|
| 104 |
+
df["voix"] = df["voix"].fillna(0)
|
| 105 |
+
return df
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
def _mapping_from_yaml(path: Path) -> pd.DataFrame:
|
| 109 |
+
try:
|
| 110 |
+
import yaml
|
| 111 |
+
except Exception as exc:
|
| 112 |
+
raise RuntimeError("PyYAML est requis pour charger un mapping YAML.") from exc
|
| 113 |
+
raw = yaml.safe_load(path.read_text()) or {}
|
| 114 |
+
if not isinstance(raw, dict):
|
| 115 |
+
raise ValueError("Mapping YAML invalide: attendu un dictionnaire.")
|
| 116 |
+
|
| 117 |
+
base_mapping = raw.get("base_mapping")
|
| 118 |
+
mapping_entries = raw.get("mapping")
|
| 119 |
+
overrides = raw.get("overrides", [])
|
| 120 |
+
|
| 121 |
+
mapping = pd.DataFrame()
|
| 122 |
+
if mapping_entries:
|
| 123 |
+
mapping = pd.DataFrame(mapping_entries)
|
| 124 |
+
elif base_mapping:
|
| 125 |
+
base_path = Path(base_mapping)
|
| 126 |
+
if not base_path.is_absolute():
|
| 127 |
+
base_path = path.parent / base_path
|
| 128 |
+
mapping = pd.read_csv(base_path, sep=";")
|
| 129 |
+
else:
|
| 130 |
+
mapping = pd.DataFrame(columns=["code_candidature", "nom_candidature", "bloc_1", "bloc_2", "bloc_3"])
|
| 131 |
+
|
| 132 |
+
if overrides:
|
| 133 |
+
override_df = pd.DataFrame(overrides)
|
| 134 |
+
if not override_df.empty:
|
| 135 |
+
if "blocs" in override_df.columns:
|
| 136 |
+
blocs = override_df["blocs"].apply(lambda v: v if isinstance(v, list) else [])
|
| 137 |
+
override_df["bloc_1"] = blocs.apply(lambda v: v[0] if len(v) > 0 else None)
|
| 138 |
+
override_df["bloc_2"] = blocs.apply(lambda v: v[1] if len(v) > 1 else None)
|
| 139 |
+
override_df["bloc_3"] = blocs.apply(lambda v: v[2] if len(v) > 2 else None)
|
| 140 |
+
override_df = override_df.drop(columns=["blocs"])
|
| 141 |
+
if "code_candidature" not in override_df.columns and "code" in override_df.columns:
|
| 142 |
+
override_df = override_df.rename(columns={"code": "code_candidature"})
|
| 143 |
+
if "nom_candidature" not in override_df.columns and "nom" in override_df.columns:
|
| 144 |
+
override_df = override_df.rename(columns={"nom": "nom_candidature"})
|
| 145 |
+
|
| 146 |
+
mapping = mapping.copy()
|
| 147 |
+
if "code_candidature" in mapping.columns:
|
| 148 |
+
mapping["code_candidature"] = mapping["code_candidature"].astype(str)
|
| 149 |
+
if "code_candidature" in override_df.columns:
|
| 150 |
+
override_df["code_candidature"] = override_df["code_candidature"].astype(str)
|
| 151 |
+
|
| 152 |
+
for _, row in override_df.iterrows():
|
| 153 |
+
code = row.get("code_candidature")
|
| 154 |
+
if code is None:
|
| 155 |
+
continue
|
| 156 |
+
if "code_candidature" in mapping.columns:
|
| 157 |
+
mask = mapping["code_candidature"] == code
|
| 158 |
+
else:
|
| 159 |
+
mask = pd.Series([False] * len(mapping))
|
| 160 |
+
if mask.any():
|
| 161 |
+
for col in ["nom_candidature", "bloc_1", "bloc_2", "bloc_3"]:
|
| 162 |
+
if col in row and pd.notna(row[col]):
|
| 163 |
+
mapping.loc[mask, col] = row[col]
|
| 164 |
+
else:
|
| 165 |
+
mapping = pd.concat([mapping, pd.DataFrame([row])], ignore_index=True)
|
| 166 |
+
return mapping
|
| 167 |
+
|
| 168 |
+
|
| 169 |
+
def load_bloc_mapping(path: Path) -> pd.DataFrame:
|
| 170 |
+
if path.suffix in {".yml", ".yaml"}:
|
| 171 |
+
mapping = _mapping_from_yaml(path)
|
| 172 |
+
else:
|
| 173 |
+
mapping = pd.read_csv(path, sep=";")
|
| 174 |
+
# normalise bloc labels once to avoid surprises downstream
|
| 175 |
+
for col in ["bloc_1", "bloc_2", "bloc_3"]:
|
| 176 |
+
if col in mapping.columns:
|
| 177 |
+
mapping[col] = mapping[col].apply(normalize_bloc)
|
| 178 |
+
return mapping
|
| 179 |
+
|
| 180 |
+
|
| 181 |
+
def expand_voix_by_bloc(elections_long: pd.DataFrame, mapping: pd.DataFrame) -> pd.DataFrame:
|
| 182 |
+
"""
|
| 183 |
+
Distribute voix of each candidature across its mapped blocs.
|
| 184 |
+
"""
|
| 185 |
+
df = elections_long.merge(mapping, on="code_candidature", how="left")
|
| 186 |
+
records: list[dict] = []
|
| 187 |
+
for _, row in df.iterrows():
|
| 188 |
+
blocs = [row.get("bloc_1"), row.get("bloc_2"), row.get("bloc_3")]
|
| 189 |
+
blocs = [b for b in blocs if isinstance(b, str) and b]
|
| 190 |
+
blocs = [normalize_bloc(b) for b in blocs]
|
| 191 |
+
if not blocs:
|
| 192 |
+
blocs = ["centre"]
|
| 193 |
+
voix = row.get("voix", 0) or 0
|
| 194 |
+
repartition = voix / len(blocs)
|
| 195 |
+
for bloc in blocs:
|
| 196 |
+
records.append(
|
| 197 |
+
{
|
| 198 |
+
"code_bv": row.get("code_bv"),
|
| 199 |
+
"nom_bv": row.get("nom_bv"),
|
| 200 |
+
"date_scrutin": row.get("date_scrutin"),
|
| 201 |
+
"annee": row.get("annee"),
|
| 202 |
+
"type_scrutin": row.get("type_scrutin"),
|
| 203 |
+
"tour": row.get("tour"),
|
| 204 |
+
"bloc": bloc,
|
| 205 |
+
"voix_bloc": repartition,
|
| 206 |
+
"exprimes": row.get("exprimes"),
|
| 207 |
+
"inscrits": row.get("inscrits"),
|
| 208 |
+
"votants": row.get("votants"),
|
| 209 |
+
"blancs": row.get("blancs"),
|
| 210 |
+
"nuls": row.get("nuls"),
|
| 211 |
+
}
|
| 212 |
+
)
|
| 213 |
+
result = pd.DataFrame.from_records(records)
|
| 214 |
+
result["date_scrutin"] = pd.to_datetime(result["date_scrutin"])
|
| 215 |
+
for col in ["voix_bloc", "exprimes", "inscrits", "votants", "blancs", "nuls"]:
|
| 216 |
+
result[col] = pd.to_numeric(result[col], errors="coerce")
|
| 217 |
+
result["part_bloc"] = result["voix_bloc"] / result["exprimes"]
|
| 218 |
+
base_inscrits = result["inscrits"].replace(0, pd.NA)
|
| 219 |
+
result["taux_participation_bv"] = result["votants"] / base_inscrits
|
| 220 |
+
result["taux_blancs_bv"] = result["blancs"] / base_inscrits
|
| 221 |
+
result["taux_nuls_bv"] = result["nuls"] / base_inscrits
|
| 222 |
+
return result
|
| 223 |
+
|
| 224 |
+
|
| 225 |
+
def compute_national_reference(elections_blocs: pd.DataFrame) -> pd.DataFrame:
|
| 226 |
+
"""
|
| 227 |
+
Aggregate national part/participation per date & bloc if no external national file is provided.
|
| 228 |
+
"""
|
| 229 |
+
grouped = (
|
| 230 |
+
elections_blocs.groupby(["date_scrutin", "bloc"], as_index=False)[["voix_bloc", "exprimes", "votants", "inscrits"]]
|
| 231 |
+
.sum()
|
| 232 |
+
.rename(columns={"voix_bloc": "voix_bloc_nat", "exprimes": "exprimes_nat", "votants": "votants_nat", "inscrits": "inscrits_nat"})
|
| 233 |
+
)
|
| 234 |
+
grouped["part_bloc_national"] = grouped["voix_bloc_nat"] / grouped["exprimes_nat"].replace(0, pd.NA)
|
| 235 |
+
grouped["taux_participation_national"] = grouped["votants_nat"] / grouped["inscrits_nat"].replace(0, pd.NA)
|
| 236 |
+
return grouped[["date_scrutin", "bloc", "part_bloc_national", "taux_participation_national"]]
|
| 237 |
+
|
| 238 |
+
|
| 239 |
+
def attach_national_results(
|
| 240 |
+
elections_blocs: pd.DataFrame,
|
| 241 |
+
resultats_nationaux: Optional[pd.DataFrame] = None,
|
| 242 |
+
) -> pd.DataFrame:
|
| 243 |
+
"""
|
| 244 |
+
Merge national reference scores if provided; otherwise, compute them from the full dataset.
|
| 245 |
+
"""
|
| 246 |
+
if resultats_nationaux is None:
|
| 247 |
+
df_nat = compute_national_reference(elections_blocs)
|
| 248 |
+
else:
|
| 249 |
+
df_nat = resultats_nationaux.copy()
|
| 250 |
+
df_nat["date_scrutin"] = pd.to_datetime(df_nat["date_scrutin"])
|
| 251 |
+
|
| 252 |
+
elections_blocs = elections_blocs.merge(df_nat, on=["date_scrutin", "bloc"], how="left")
|
| 253 |
+
elections_blocs["ecart_bloc_vs_national"] = (
|
| 254 |
+
elections_blocs["part_bloc"] - elections_blocs["part_bloc_national"]
|
| 255 |
+
)
|
| 256 |
+
elections_blocs["ecart_participation_vs_nat"] = (
|
| 257 |
+
elections_blocs["taux_participation_bv"] - elections_blocs["taux_participation_national"]
|
| 258 |
+
)
|
| 259 |
+
return elections_blocs
|
| 260 |
+
|
| 261 |
+
|
| 262 |
+
def compute_population_growth(elections_blocs: pd.DataFrame, base_year: int = 2014) -> pd.DataFrame:
|
| 263 |
+
bv_pop = elections_blocs.groupby(["code_bv", "annee"], as_index=False)["inscrits"].mean()
|
| 264 |
+
bv_base = (
|
| 265 |
+
bv_pop[bv_pop["annee"] == base_year][["code_bv", "inscrits"]]
|
| 266 |
+
.rename(columns={"inscrits": "inscrits_base"})
|
| 267 |
+
)
|
| 268 |
+
bv_pop = bv_pop.merge(bv_base, on="code_bv", how="left")
|
| 269 |
+
bv_pop["croissance_inscrits_depuis_base"] = (
|
| 270 |
+
bv_pop["inscrits"] - bv_pop["inscrits_base"]
|
| 271 |
+
) / bv_pop["inscrits_base"]
|
| 272 |
+
|
| 273 |
+
elections_blocs = elections_blocs.merge(
|
| 274 |
+
bv_pop[["code_bv", "annee", "croissance_inscrits_depuis_base"]],
|
| 275 |
+
on=["code_bv", "annee"],
|
| 276 |
+
how="left",
|
| 277 |
+
)
|
| 278 |
+
return elections_blocs
|
| 279 |
+
|
| 280 |
+
|
| 281 |
+
def add_lag_features(elections_blocs: pd.DataFrame) -> pd.DataFrame:
|
| 282 |
+
df = elections_blocs.sort_values(["code_bv", "bloc", "date_scrutin"])
|
| 283 |
+
df["part_bloc_lag1"] = df.groupby(["code_bv", "bloc"])["part_bloc"].shift(1)
|
| 284 |
+
df["ecart_bloc_vs_national_lag1"] = df.groupby(["code_bv", "bloc"])[
|
| 285 |
+
"ecart_bloc_vs_national"
|
| 286 |
+
].shift(1)
|
| 287 |
+
df["taux_participation_bv_lag1"] = df.groupby(["code_bv", "bloc"])[
|
| 288 |
+
"taux_participation_bv"
|
| 289 |
+
].shift(1)
|
| 290 |
+
df["annee_centre"] = df["annee"] - df["annee"].median()
|
| 291 |
+
return df
|
| 292 |
+
|
| 293 |
+
|
| 294 |
+
def filter_target_communes(elections_blocs: pd.DataFrame, target_communes: Mapping[str, str]) -> pd.DataFrame:
|
| 295 |
+
"""
|
| 296 |
+
Keep only bureaux belonging to the target communes list.
|
| 297 |
+
"""
|
| 298 |
+
df = elections_blocs.copy()
|
| 299 |
+
if "code_commune" in df.columns:
|
| 300 |
+
code_series = df["code_commune"].astype(str)
|
| 301 |
+
else:
|
| 302 |
+
code_series = df["code_bv"].astype(str).str.split("-").str[0]
|
| 303 |
+
code_series = code_series.str.replace(r"\D", "", regex=True).str.zfill(5).str.slice(0, 5)
|
| 304 |
+
df["code_commune"] = code_series
|
| 305 |
+
df["nom_commune"] = df["code_commune"].map(target_communes)
|
| 306 |
+
return df[df["code_commune"].isin(target_communes.keys())]
|
| 307 |
+
|
| 308 |
+
|
| 309 |
+
def compute_commune_event_stats(
|
| 310 |
+
elections_long: pd.DataFrame,
|
| 311 |
+
target_communes: Mapping[str, str],
|
| 312 |
+
) -> pd.DataFrame:
|
| 313 |
+
df = elections_long.copy()
|
| 314 |
+
if "code_commune" in df.columns:
|
| 315 |
+
code_series = df["code_commune"].astype(str)
|
| 316 |
+
else:
|
| 317 |
+
code_series = df["code_bv"].astype(str).str.split("-").str[0]
|
| 318 |
+
code_series = code_series.str.replace(r"\D", "", regex=True).str.zfill(5).str.slice(0, 5)
|
| 319 |
+
df["code_commune"] = code_series
|
| 320 |
+
df = df[df["code_commune"].isin(target_communes.keys())]
|
| 321 |
+
df["nom_commune"] = df["code_commune"].map(target_communes)
|
| 322 |
+
if "date_scrutin" in df.columns:
|
| 323 |
+
df["date_scrutin"] = pd.to_datetime(df["date_scrutin"], errors="coerce")
|
| 324 |
+
for col in ["exprimes", "inscrits", "votants", "blancs", "nuls"]:
|
| 325 |
+
if col in df.columns:
|
| 326 |
+
df[col] = pd.to_numeric(df[col], errors="coerce")
|
| 327 |
+
else:
|
| 328 |
+
df[col] = pd.NA
|
| 329 |
+
|
| 330 |
+
bv_cols = [c for c in ["code_commune", "code_bv", "type_scrutin", "annee", "tour", "date_scrutin"] if c in df.columns]
|
| 331 |
+
bv_event = (
|
| 332 |
+
df.groupby(bv_cols, as_index=False)
|
| 333 |
+
.agg(
|
| 334 |
+
exprimes=("exprimes", "max"),
|
| 335 |
+
inscrits=("inscrits", "max"),
|
| 336 |
+
votants=("votants", "max"),
|
| 337 |
+
blancs=("blancs", "max"),
|
| 338 |
+
nuls=("nuls", "max"),
|
| 339 |
+
)
|
| 340 |
+
)
|
| 341 |
+
commune_cols = [c for c in ["code_commune", "type_scrutin", "annee", "tour", "date_scrutin"] if c in bv_event.columns]
|
| 342 |
+
commune = (
|
| 343 |
+
bv_event.groupby(commune_cols, as_index=False)
|
| 344 |
+
.agg(
|
| 345 |
+
exprimes=("exprimes", "sum"),
|
| 346 |
+
inscrits=("inscrits", "sum"),
|
| 347 |
+
votants=("votants", "sum"),
|
| 348 |
+
blancs=("blancs", "sum"),
|
| 349 |
+
nuls=("nuls", "sum"),
|
| 350 |
+
)
|
| 351 |
+
)
|
| 352 |
+
base_inscrits = commune["inscrits"].replace(0, pd.NA)
|
| 353 |
+
commune["turnout_pct"] = commune["votants"] / base_inscrits
|
| 354 |
+
commune["blancs_pct"] = commune["blancs"] / base_inscrits
|
| 355 |
+
commune["nuls_pct"] = commune["nuls"] / base_inscrits
|
| 356 |
+
commune["nom_commune"] = commune["code_commune"].map(target_communes)
|
| 357 |
+
return commune
|
| 358 |
+
|
| 359 |
+
|
| 360 |
+
def build_elections_blocs(
|
| 361 |
+
elections_long_path: Path,
|
| 362 |
+
mapping_path: Path,
|
| 363 |
+
*,
|
| 364 |
+
national_results_path: Optional[Path] = None,
|
| 365 |
+
base_year: int = 2014,
|
| 366 |
+
target_communes_path: Path = DEFAULT_COMMUNES_PATH,
|
| 367 |
+
) -> pd.DataFrame:
|
| 368 |
+
elections_long = load_elections_long(elections_long_path)
|
| 369 |
+
mapping = load_bloc_mapping(mapping_path)
|
| 370 |
+
|
| 371 |
+
elections_blocs = expand_voix_by_bloc(elections_long, mapping)
|
| 372 |
+
|
| 373 |
+
national_df = None
|
| 374 |
+
if national_results_path and national_results_path.exists():
|
| 375 |
+
if national_results_path.suffix == ".parquet":
|
| 376 |
+
national_df = pd.read_parquet(national_results_path)
|
| 377 |
+
else:
|
| 378 |
+
national_df = pd.read_csv(national_results_path, sep=";")
|
| 379 |
+
# Always attach national reference (computed from full data if no external source)
|
| 380 |
+
elections_blocs = attach_national_results(elections_blocs, national_df)
|
| 381 |
+
# Restreindre aux communes cibles via le fichier YAML
|
| 382 |
+
target_communes = load_target_communes(target_communes_path)
|
| 383 |
+
elections_blocs = filter_target_communes(elections_blocs, target_communes)
|
| 384 |
+
|
| 385 |
+
elections_blocs = compute_population_growth(elections_blocs, base_year=base_year)
|
| 386 |
+
elections_blocs = add_lag_features(elections_blocs)
|
| 387 |
+
return elections_blocs
|
| 388 |
+
|
| 389 |
+
|
| 390 |
+
def save_processed(df: pd.DataFrame, output_dir: Path) -> None:
|
| 391 |
+
output_dir.mkdir(parents=True, exist_ok=True)
|
| 392 |
+
parquet_path = output_dir / "elections_blocs.parquet"
|
| 393 |
+
csv_path = output_dir / "elections_blocs.csv"
|
| 394 |
+
df.to_parquet(parquet_path, index=False)
|
| 395 |
+
df.to_csv(csv_path, sep=";", index=False)
|
| 396 |
+
|
| 397 |
+
|
| 398 |
+
def save_commune_event_stats(df: pd.DataFrame, output_dir: Path) -> None:
|
| 399 |
+
output_dir.mkdir(parents=True, exist_ok=True)
|
| 400 |
+
parquet_path = output_dir / "commune_event_stats.parquet"
|
| 401 |
+
csv_path = output_dir / "commune_event_stats.csv"
|
| 402 |
+
df.to_parquet(parquet_path, index=False)
|
| 403 |
+
df.to_csv(csv_path, sep=";", index=False)
|
| 404 |
+
|
| 405 |
+
|
| 406 |
+
def run_full_pipeline(
|
| 407 |
+
elections_long_path: Path = Path("data/interim/elections_long.parquet"),
|
| 408 |
+
mapping_path: Path = Path("config/nuances.yaml"),
|
| 409 |
+
output_dir: Path = Path("data/processed"),
|
| 410 |
+
national_results_path: Optional[Path] = None,
|
| 411 |
+
target_communes_path: Path = DEFAULT_COMMUNES_PATH,
|
| 412 |
+
) -> pd.DataFrame:
|
| 413 |
+
df = build_elections_blocs(
|
| 414 |
+
elections_long_path=elections_long_path,
|
| 415 |
+
mapping_path=mapping_path,
|
| 416 |
+
national_results_path=national_results_path,
|
| 417 |
+
target_communes_path=target_communes_path,
|
| 418 |
+
)
|
| 419 |
+
save_processed(df, output_dir)
|
| 420 |
+
elections_long = load_elections_long(elections_long_path)
|
| 421 |
+
target_communes = load_target_communes(target_communes_path)
|
| 422 |
+
commune_stats = compute_commune_event_stats(elections_long, target_communes)
|
| 423 |
+
save_commune_event_stats(commune_stats, output_dir)
|
| 424 |
+
return df
|
| 425 |
+
|
| 426 |
+
|
| 427 |
+
__all__ = [
|
| 428 |
+
"build_elections_blocs",
|
| 429 |
+
"run_full_pipeline",
|
| 430 |
+
"save_processed",
|
| 431 |
+
"normalize_bloc",
|
| 432 |
+
"load_target_communes",
|
| 433 |
+
"compute_commune_event_stats",
|
| 434 |
+
"save_commune_event_stats",
|
| 435 |
+
]
|
src/prediction.py
ADDED
|
@@ -0,0 +1,311 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from dataclasses import dataclass
|
| 4 |
+
from typing import Optional
|
| 5 |
+
|
| 6 |
+
import numpy as np
|
| 7 |
+
import pandas as pd
|
| 8 |
+
import warnings
|
| 9 |
+
|
| 10 |
+
from .constants import CANDIDATE_CATEGORIES
|
| 11 |
+
from .pipeline import normalize_bloc
|
| 12 |
+
|
| 13 |
+
try:
|
| 14 |
+
from numpy import RankWarning as NP_RANK_WARNING # type: ignore[attr-defined]
|
| 15 |
+
except Exception:
|
| 16 |
+
class NP_RANK_WARNING(UserWarning):
|
| 17 |
+
pass
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
@dataclass
|
| 21 |
+
class PredictionResult:
|
| 22 |
+
category: str
|
| 23 |
+
predicted_share: float
|
| 24 |
+
predicted_count: int
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
@dataclass
|
| 28 |
+
class PredictionSummary:
|
| 29 |
+
bloc_predictions: list[PredictionResult]
|
| 30 |
+
inscrits: Optional[int]
|
| 31 |
+
votants: Optional[int]
|
| 32 |
+
blancs: Optional[int]
|
| 33 |
+
nuls: Optional[int]
|
| 34 |
+
abstention: Optional[int]
|
| 35 |
+
exprimes: Optional[int]
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
DISPLAY_BLOC_ORDER = [
|
| 39 |
+
"extreme_gauche",
|
| 40 |
+
"gauche_dure",
|
| 41 |
+
"gauche_modere",
|
| 42 |
+
"centre",
|
| 43 |
+
"droite_modere",
|
| 44 |
+
"droite_dure",
|
| 45 |
+
"extreme_droite",
|
| 46 |
+
]
|
| 47 |
+
EXTRA_CATEGORIES = ["blancs", "nuls", "abstention"]
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def _clip01(value: float) -> float:
|
| 51 |
+
return float(min(1.0, max(0.0, value)))
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def _last_share(df: pd.DataFrame, bloc: str, *, election: Optional[str] = None, year: Optional[int] = None) -> Optional[float]:
|
| 55 |
+
subset = df[df["bloc"] == bloc]
|
| 56 |
+
if election:
|
| 57 |
+
subset = subset[subset["type_scrutin"] == election]
|
| 58 |
+
if year is not None:
|
| 59 |
+
subset = subset[subset["annee"] == year]
|
| 60 |
+
if subset.empty:
|
| 61 |
+
return None
|
| 62 |
+
valid = subset.sort_values("date_scrutin")["part_bloc"].dropna()
|
| 63 |
+
if valid.empty:
|
| 64 |
+
return None
|
| 65 |
+
return valid.iloc[-1] # type: ignore[index]
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def _last_value(series: pd.Series) -> Optional[float]:
|
| 69 |
+
series = pd.to_numeric(series, errors="coerce").dropna()
|
| 70 |
+
if series.empty:
|
| 71 |
+
return None
|
| 72 |
+
return float(series.iloc[-1])
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
def _project_share(series: pd.Series, years: pd.Series, target_year: int) -> Optional[float]:
|
| 76 |
+
df = pd.DataFrame({"value": pd.to_numeric(series, errors="coerce"), "year": pd.to_numeric(years, errors="coerce")})
|
| 77 |
+
df = df.dropna()
|
| 78 |
+
if df.empty:
|
| 79 |
+
return None
|
| 80 |
+
if len(df["year"].unique()) >= 2 and len(df) >= 2:
|
| 81 |
+
# Guard against poorly conditioned fits on tiny samples
|
| 82 |
+
with warnings.catch_warnings():
|
| 83 |
+
warnings.simplefilter("ignore", category=NP_RANK_WARNING)
|
| 84 |
+
try:
|
| 85 |
+
slope, intercept = np.polyfit(df["year"], df["value"], 1)
|
| 86 |
+
projected = slope * target_year + intercept
|
| 87 |
+
except Exception:
|
| 88 |
+
projected = df["value"].iloc[-1]
|
| 89 |
+
else:
|
| 90 |
+
projected = df["value"].iloc[-1]
|
| 91 |
+
return _clip01(float(projected))
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
def _project_rate(
|
| 95 |
+
series: pd.Series,
|
| 96 |
+
years: pd.Series,
|
| 97 |
+
target_year: int,
|
| 98 |
+
*,
|
| 99 |
+
min_points_trend: int = 3,
|
| 100 |
+
clamp_to_observed: bool = True,
|
| 101 |
+
) -> Optional[float]:
|
| 102 |
+
df = pd.DataFrame(
|
| 103 |
+
{"value": pd.to_numeric(series, errors="coerce"), "year": pd.to_numeric(years, errors="coerce")}
|
| 104 |
+
).dropna()
|
| 105 |
+
if df.empty:
|
| 106 |
+
return None
|
| 107 |
+
values = df["value"].to_numpy()
|
| 108 |
+
years_arr = df["year"].to_numpy()
|
| 109 |
+
if len(set(years_arr)) >= min_points_trend and len(df) >= min_points_trend:
|
| 110 |
+
with warnings.catch_warnings():
|
| 111 |
+
warnings.simplefilter("ignore", category=NP_RANK_WARNING)
|
| 112 |
+
try:
|
| 113 |
+
slope, intercept = np.polyfit(years_arr, values, 1)
|
| 114 |
+
projected = slope * target_year + intercept
|
| 115 |
+
except Exception:
|
| 116 |
+
projected = values[-1]
|
| 117 |
+
else:
|
| 118 |
+
projected = values[-1]
|
| 119 |
+
if clamp_to_observed and len(values):
|
| 120 |
+
projected = min(max(projected, float(np.nanmin(values))), float(np.nanmax(values)))
|
| 121 |
+
return _clip01(float(projected))
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
def _allocate_counts(shares: list[float], total: int) -> list[int]:
|
| 125 |
+
if total <= 0 or not shares:
|
| 126 |
+
return [0 for _ in shares]
|
| 127 |
+
arr = np.clip(np.asarray(shares, dtype=float), 0, None)
|
| 128 |
+
if arr.sum() == 0:
|
| 129 |
+
return [0 for _ in shares]
|
| 130 |
+
arr = arr / arr.sum()
|
| 131 |
+
raw = arr * total
|
| 132 |
+
floors = np.floor(raw)
|
| 133 |
+
remainder = int(total - floors.sum())
|
| 134 |
+
if remainder > 0:
|
| 135 |
+
order = np.argsort(-(raw - floors))
|
| 136 |
+
for idx in order[:remainder]:
|
| 137 |
+
floors[idx] += 1
|
| 138 |
+
return floors.astype(int).tolist()
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
def compute_predictions(
|
| 142 |
+
history: pd.DataFrame,
|
| 143 |
+
*,
|
| 144 |
+
target_election: str = "municipales",
|
| 145 |
+
target_year: int = 2026,
|
| 146 |
+
inscrits_override: Optional[float] = None,
|
| 147 |
+
) -> PredictionSummary:
|
| 148 |
+
if history.empty:
|
| 149 |
+
return PredictionSummary([], None, None, None, None, None, None)
|
| 150 |
+
|
| 151 |
+
df = history.copy()
|
| 152 |
+
target_election = str(target_election).strip().lower()
|
| 153 |
+
df["bloc"] = df["bloc"].apply(normalize_bloc)
|
| 154 |
+
if "type_scrutin" in df.columns:
|
| 155 |
+
df["type_scrutin"] = df["type_scrutin"].astype(str).str.strip().str.lower()
|
| 156 |
+
# Coerce numeric and infer exprimes when missing from the sum of voix_bloc
|
| 157 |
+
for col in ["voix_bloc", "exprimes", "inscrits", "votants", "blancs", "nuls"]:
|
| 158 |
+
if col in df.columns:
|
| 159 |
+
df[col] = pd.to_numeric(df[col], errors="coerce")
|
| 160 |
+
for col in ["inscrits", "votants", "blancs", "nuls"]:
|
| 161 |
+
if col not in df.columns:
|
| 162 |
+
df[col] = np.nan
|
| 163 |
+
if "exprimes" in df.columns:
|
| 164 |
+
sum_voix = df.groupby(["code_bv", "date_scrutin"])["voix_bloc"].transform("sum")
|
| 165 |
+
df["exprimes"] = df["exprimes"].fillna(sum_voix)
|
| 166 |
+
df.loc[df["exprimes"] == 0, "exprimes"] = sum_voix
|
| 167 |
+
if "part_bloc" not in df.columns or df["part_bloc"].isna().all():
|
| 168 |
+
df["part_bloc"] = df["voix_bloc"] / df["exprimes"]
|
| 169 |
+
df["part_bloc"] = pd.to_numeric(df["part_bloc"], errors="coerce").clip(upper=1)
|
| 170 |
+
df = df.dropna(subset=["bloc"])
|
| 171 |
+
|
| 172 |
+
bloc_order = [b for b in DISPLAY_BLOC_ORDER if b in CANDIDATE_CATEGORIES]
|
| 173 |
+
raw_shares: dict[str, float] = {}
|
| 174 |
+
for bloc in bloc_order:
|
| 175 |
+
bloc_hist = df[df["bloc"] == bloc].sort_values("date_scrutin")
|
| 176 |
+
last_overall = _last_share(bloc_hist, bloc)
|
| 177 |
+
base_series = bloc_hist["part_bloc"]
|
| 178 |
+
base_years = bloc_hist["annee"]
|
| 179 |
+
if not bloc_hist.empty and target_election in bloc_hist["type_scrutin"].values:
|
| 180 |
+
base_series = bloc_hist[bloc_hist["type_scrutin"] == target_election]["part_bloc"]
|
| 181 |
+
base_years = bloc_hist[bloc_hist["type_scrutin"] == target_election]["annee"]
|
| 182 |
+
|
| 183 |
+
projected = _project_share(base_series, base_years, target_year)
|
| 184 |
+
if projected is None and last_overall is not None:
|
| 185 |
+
projected = last_overall
|
| 186 |
+
predicted = _clip01(projected or 0.0)
|
| 187 |
+
raw_shares[bloc] = predicted
|
| 188 |
+
|
| 189 |
+
share_values = np.array([raw_shares.get(b, 0.0) for b in bloc_order], dtype=float)
|
| 190 |
+
share_sum = share_values.sum()
|
| 191 |
+
if share_sum > 0:
|
| 192 |
+
share_values = share_values / share_sum
|
| 193 |
+
else:
|
| 194 |
+
share_values = np.zeros_like(share_values)
|
| 195 |
+
|
| 196 |
+
event_cols = [col for col in ["code_bv", "date_scrutin", "type_scrutin", "tour", "annee"] if col in df.columns]
|
| 197 |
+
event_df = df.groupby(event_cols, as_index=False).agg(
|
| 198 |
+
inscrits=("inscrits", "max"),
|
| 199 |
+
votants=("votants", "max"),
|
| 200 |
+
blancs=("blancs", "max"),
|
| 201 |
+
nuls=("nuls", "max"),
|
| 202 |
+
)
|
| 203 |
+
if "date_scrutin" in event_df.columns:
|
| 204 |
+
event_df = event_df.sort_values("date_scrutin")
|
| 205 |
+
if "type_scrutin" not in event_df.columns:
|
| 206 |
+
event_df["type_scrutin"] = ""
|
| 207 |
+
if "annee" not in event_df.columns:
|
| 208 |
+
if "date_scrutin" in event_df.columns:
|
| 209 |
+
event_df["annee"] = pd.to_datetime(event_df["date_scrutin"], errors="coerce").dt.year
|
| 210 |
+
else:
|
| 211 |
+
event_df["annee"] = np.nan
|
| 212 |
+
base_inscrits = event_df["inscrits"].replace(0, pd.NA)
|
| 213 |
+
event_df["taux_participation"] = event_df["votants"] / base_inscrits
|
| 214 |
+
event_df["taux_blancs"] = event_df["blancs"] / base_inscrits
|
| 215 |
+
event_df["taux_nuls"] = event_df["nuls"] / base_inscrits
|
| 216 |
+
|
| 217 |
+
def _select_series(col: str) -> tuple[pd.Series, pd.Series]:
|
| 218 |
+
scoped = event_df
|
| 219 |
+
if "tour" in event_df.columns:
|
| 220 |
+
round1 = event_df[event_df["tour"] == 1]
|
| 221 |
+
if not round1.empty:
|
| 222 |
+
scoped = round1
|
| 223 |
+
if not scoped.empty and target_election in scoped["type_scrutin"].values:
|
| 224 |
+
mask = scoped["type_scrutin"] == target_election
|
| 225 |
+
return scoped.loc[mask, col], scoped.loc[mask, "annee"]
|
| 226 |
+
return scoped[col], scoped["annee"]
|
| 227 |
+
|
| 228 |
+
turnout_series, turnout_years = _select_series("taux_participation")
|
| 229 |
+
blancs_series, blancs_years = _select_series("taux_blancs")
|
| 230 |
+
nuls_series, nuls_years = _select_series("taux_nuls")
|
| 231 |
+
|
| 232 |
+
taux_participation = _project_rate(turnout_series, turnout_years, target_year)
|
| 233 |
+
taux_blancs = _project_rate(blancs_series, blancs_years, target_year)
|
| 234 |
+
taux_nuls = _project_rate(nuls_series, nuls_years, target_year)
|
| 235 |
+
|
| 236 |
+
inscrits_used = None
|
| 237 |
+
if inscrits_override is not None:
|
| 238 |
+
try:
|
| 239 |
+
value = float(inscrits_override)
|
| 240 |
+
if value > 0:
|
| 241 |
+
inscrits_used = value
|
| 242 |
+
except (TypeError, ValueError):
|
| 243 |
+
inscrits_used = None
|
| 244 |
+
if inscrits_used is None:
|
| 245 |
+
inscrits_used = _last_value(event_df["inscrits"])
|
| 246 |
+
if inscrits_used is None:
|
| 247 |
+
return PredictionSummary([], None, None, None, None, None, None)
|
| 248 |
+
|
| 249 |
+
if taux_participation is None:
|
| 250 |
+
taux_participation = 0.0
|
| 251 |
+
if taux_blancs is None:
|
| 252 |
+
taux_blancs = 0.0
|
| 253 |
+
if taux_nuls is None:
|
| 254 |
+
taux_nuls = 0.0
|
| 255 |
+
|
| 256 |
+
if taux_blancs + taux_nuls > taux_participation and (taux_blancs + taux_nuls) > 0:
|
| 257 |
+
scale = taux_participation / (taux_blancs + taux_nuls)
|
| 258 |
+
taux_blancs *= scale
|
| 259 |
+
taux_nuls *= scale
|
| 260 |
+
|
| 261 |
+
inscrits_total = int(round(inscrits_used))
|
| 262 |
+
votants_total = int(round(inscrits_total * taux_participation))
|
| 263 |
+
blancs_total = int(round(inscrits_total * taux_blancs))
|
| 264 |
+
nuls_total = int(round(inscrits_total * taux_nuls))
|
| 265 |
+
if blancs_total + nuls_total > votants_total and (blancs_total + nuls_total) > 0:
|
| 266 |
+
scale = votants_total / (blancs_total + nuls_total)
|
| 267 |
+
blancs_total = int(round(blancs_total * scale))
|
| 268 |
+
nuls_total = int(round(nuls_total * scale))
|
| 269 |
+
exprimes_total = max(0, votants_total - blancs_total - nuls_total)
|
| 270 |
+
abstention_total = max(0, inscrits_total - votants_total)
|
| 271 |
+
|
| 272 |
+
bloc_counts = _allocate_counts(share_values.tolist(), exprimes_total)
|
| 273 |
+
bloc_predictions: list[PredictionResult] = []
|
| 274 |
+
for bloc, share, count in zip(bloc_order, share_values.tolist(), bloc_counts):
|
| 275 |
+
bloc_predictions.append(
|
| 276 |
+
PredictionResult(
|
| 277 |
+
category=bloc,
|
| 278 |
+
predicted_share=float(share),
|
| 279 |
+
predicted_count=int(count),
|
| 280 |
+
)
|
| 281 |
+
)
|
| 282 |
+
|
| 283 |
+
return PredictionSummary(
|
| 284 |
+
bloc_predictions=bloc_predictions,
|
| 285 |
+
inscrits=inscrits_total,
|
| 286 |
+
votants=votants_total,
|
| 287 |
+
blancs=blancs_total,
|
| 288 |
+
nuls=nuls_total,
|
| 289 |
+
abstention=abstention_total,
|
| 290 |
+
exprimes=exprimes_total,
|
| 291 |
+
)
|
| 292 |
+
|
| 293 |
+
|
| 294 |
+
def predictions_as_dataframe(summary: PredictionSummary) -> pd.DataFrame:
|
| 295 |
+
if summary is None or not summary.bloc_predictions:
|
| 296 |
+
return pd.DataFrame(columns=["categorie", "nombre"])
|
| 297 |
+
rows = []
|
| 298 |
+
pred_map = {item.category: item for item in summary.bloc_predictions}
|
| 299 |
+
for bloc in [b for b in DISPLAY_BLOC_ORDER if b in pred_map]:
|
| 300 |
+
item = pred_map[bloc]
|
| 301 |
+
rows.append({"categorie": bloc, "nombre": int(item.predicted_count)})
|
| 302 |
+
if summary.blancs is not None:
|
| 303 |
+
rows.append({"categorie": "blancs", "nombre": int(summary.blancs)})
|
| 304 |
+
if summary.nuls is not None:
|
| 305 |
+
rows.append({"categorie": "nuls", "nombre": int(summary.nuls)})
|
| 306 |
+
if summary.abstention is not None:
|
| 307 |
+
rows.append({"categorie": "abstention", "nombre": int(summary.abstention)})
|
| 308 |
+
return pd.DataFrame(rows)
|
| 309 |
+
|
| 310 |
+
|
| 311 |
+
__all__ = ["compute_predictions", "predictions_as_dataframe", "PredictionResult", "PredictionSummary"]
|