Sync from GitHub Actions
Browse files- .gitignore +1 -0
- app/gradio_app.py +45 -1
- data/interim/elections_long.parquet +2 -2
- data/mapping_candidats_blocs.csv +1 -1
- src/model/predict.py +39 -0
.gitignore
CHANGED
|
@@ -7,6 +7,7 @@ questions.md
|
|
| 7 |
/reports/
|
| 8 |
/data/external/
|
| 9 |
/data/raw/
|
|
|
|
| 10 |
/datasets/
|
| 11 |
/data/processed/
|
| 12 |
/data/contours-france-entiere-latest-v2.geojson
|
|
|
|
| 7 |
/reports/
|
| 8 |
/data/external/
|
| 9 |
/data/raw/
|
| 10 |
+
/data/sociodemo/
|
| 11 |
/datasets/
|
| 12 |
/data/processed/
|
| 13 |
/data/contours-france-entiere-latest-v2.geojson
|
app/gradio_app.py
CHANGED
|
@@ -110,6 +110,14 @@ INTERVAL_BANDS = {
|
|
| 110 |
"90% (p05-p95)": ("q05", "q95"),
|
| 111 |
}
|
| 112 |
NEUTRAL_MARGIN_SHARE = 0.10
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
|
| 114 |
try:
|
| 115 |
from numpy import RankWarning as NP_RANK_WARNING # type: ignore[attr-defined]
|
|
@@ -211,6 +219,41 @@ def build_interval_chart(
|
|
| 211 |
return plt
|
| 212 |
|
| 213 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 214 |
def apply_transfers(
|
| 215 |
counts: Dict[str, int],
|
| 216 |
total_inscrits: int,
|
|
@@ -1091,6 +1134,7 @@ class PredictorBackend:
|
|
| 1091 |
preds_share = preds.flatten()
|
| 1092 |
|
| 1093 |
preds_by_cat = {cat: float(preds_share[idx]) for idx, cat in enumerate(CANDIDATE_CATEGORIES)}
|
|
|
|
| 1094 |
ordered = ordered_categories()
|
| 1095 |
share_vec = np.array([preds_by_cat.get(cat, 0.0) for cat in ordered], dtype=float)
|
| 1096 |
|
|
@@ -1306,7 +1350,7 @@ def create_interface() -> gr.Blocks:
|
|
| 1306 |
Choisissez un bureau de vote et une élection cible.
|
| 1307 |
Le modèle estime un volume par catégorie politique, ainsi que les abstentions, blancs et nuls.
|
| 1308 |
|
| 1309 |
-
Auteur : [Stéphane Manet](https://manet-conseil.fr) - [Linkedin](https://www.linkedin.com/in/stephanemanet)
|
| 1310 |
"""
|
| 1311 |
)
|
| 1312 |
with gr.Tabs():
|
|
|
|
| 110 |
"90% (p05-p95)": ("q05", "q95"),
|
| 111 |
}
|
| 112 |
NEUTRAL_MARGIN_SHARE = 0.10
|
| 113 |
+
TYPE_HISTORY_BLEND = {
|
| 114 |
+
"presidentielles": 0.4,
|
| 115 |
+
"legislatives": 0.35,
|
| 116 |
+
"europeennes": 0.3,
|
| 117 |
+
"regionales": 0.3,
|
| 118 |
+
"departementales": 0.3,
|
| 119 |
+
"municipales": 0.2,
|
| 120 |
+
}
|
| 121 |
|
| 122 |
try:
|
| 123 |
from numpy import RankWarning as NP_RANK_WARNING # type: ignore[attr-defined]
|
|
|
|
| 219 |
return plt
|
| 220 |
|
| 221 |
|
| 222 |
+
def blend_with_type_history(
|
| 223 |
+
preds_by_cat: Dict[str, float],
|
| 224 |
+
row: pd.Series,
|
| 225 |
+
target_type: str,
|
| 226 |
+
) -> Dict[str, float]:
|
| 227 |
+
base_weight = TYPE_HISTORY_BLEND.get(str(target_type).lower(), 0.0)
|
| 228 |
+
if base_weight <= 0:
|
| 229 |
+
return preds_by_cat
|
| 230 |
+
available = 0
|
| 231 |
+
hist_vals: Dict[str, float | None] = {}
|
| 232 |
+
for cat in CANDIDATE_CATEGORIES:
|
| 233 |
+
val = row.get(f"prev_share_type_lag1_{cat}")
|
| 234 |
+
if val is not None and not pd.isna(val):
|
| 235 |
+
hist_vals[cat] = float(val)
|
| 236 |
+
available += 1
|
| 237 |
+
else:
|
| 238 |
+
hist_vals[cat] = None
|
| 239 |
+
if available == 0:
|
| 240 |
+
return preds_by_cat
|
| 241 |
+
weight = base_weight * (available / len(CANDIDATE_CATEGORIES))
|
| 242 |
+
blended: Dict[str, float] = {}
|
| 243 |
+
for cat in CANDIDATE_CATEGORIES:
|
| 244 |
+
base = float(preds_by_cat.get(cat, 0.0))
|
| 245 |
+
hist = hist_vals.get(cat)
|
| 246 |
+
if hist is None:
|
| 247 |
+
blended[cat] = base
|
| 248 |
+
else:
|
| 249 |
+
blended[cat] = (1 - weight) * base + weight * hist
|
| 250 |
+
total = sum(blended.values())
|
| 251 |
+
if total > 0:
|
| 252 |
+
for cat in blended:
|
| 253 |
+
blended[cat] /= total
|
| 254 |
+
return blended
|
| 255 |
+
|
| 256 |
+
|
| 257 |
def apply_transfers(
|
| 258 |
counts: Dict[str, int],
|
| 259 |
total_inscrits: int,
|
|
|
|
| 1134 |
preds_share = preds.flatten()
|
| 1135 |
|
| 1136 |
preds_by_cat = {cat: float(preds_share[idx]) for idx, cat in enumerate(CANDIDATE_CATEGORIES)}
|
| 1137 |
+
preds_by_cat = blend_with_type_history(preds_by_cat, row.iloc[0], target_type)
|
| 1138 |
ordered = ordered_categories()
|
| 1139 |
share_vec = np.array([preds_by_cat.get(cat, 0.0) for cat in ordered], dtype=float)
|
| 1140 |
|
|
|
|
| 1350 |
Choisissez un bureau de vote et une élection cible.
|
| 1351 |
Le modèle estime un volume par catégorie politique, ainsi que les abstentions, blancs et nuls.
|
| 1352 |
|
| 1353 |
+
Auteur : [Stéphane Manet](https://manet-conseil.fr) - [Linkedin](https://www.linkedin.com/in/stephanemanet) - [GitHub](https://github.com/stephmnt)
|
| 1354 |
"""
|
| 1355 |
)
|
| 1356 |
with gr.Tabs():
|
data/interim/elections_long.parquet
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:976ddedf341f8c4be434dc72e6246b0b8bb19cfede0efa3c7aa8865b47ca1c01
|
| 3 |
+
size 2398565
|
data/mapping_candidats_blocs.csv
CHANGED
|
@@ -5,7 +5,7 @@ DIV;Divers;centre;;
|
|
| 5 |
LDVD;Divers droite;droite_modere;droite_dure
|
| 6 |
LDVG;Divers gauche;gauche_modere;
|
| 7 |
LUG;Union de la gauche;;gauche_modere
|
| 8 |
-
LUD;Union de la droite;
|
| 9 |
LFN;Front national;extreme_droite;;
|
| 10 |
LEXG;Extreme gauche;extreme_gauche;;
|
| 11 |
LSOC;Parti socialiste;gauche_modere;
|
|
|
|
| 5 |
LDVD;Divers droite;droite_modere;droite_dure
|
| 6 |
LDVG;Divers gauche;gauche_modere;
|
| 7 |
LUG;Union de la gauche;;gauche_modere
|
| 8 |
+
LUD;Union de la droite;droite_dure;droite_modere
|
| 9 |
LFN;Front national;extreme_droite;;
|
| 10 |
LEXG;Extreme gauche;extreme_gauche;;
|
| 11 |
LSOC;Parti socialiste;gauche_modere;
|
src/model/predict.py
CHANGED
|
@@ -20,6 +20,40 @@ from src.features.build_features import (
|
|
| 20 |
)
|
| 21 |
|
| 22 |
LOGGER = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
|
| 25 |
def filter_history(df: pd.DataFrame, target_year: int, commune_code: str | None) -> pd.DataFrame:
|
|
@@ -141,6 +175,11 @@ def predict(
|
|
| 141 |
sums = preds.sum(axis=1, keepdims=True)
|
| 142 |
sums[sums == 0] = 1
|
| 143 |
preds = preds / sums
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
preds_pct = preds * 100
|
| 145 |
|
| 146 |
rows = []
|
|
|
|
| 20 |
)
|
| 21 |
|
| 22 |
LOGGER = logging.getLogger(__name__)
|
| 23 |
+
TYPE_HISTORY_BLEND = {
|
| 24 |
+
"presidentielles": 0.4,
|
| 25 |
+
"legislatives": 0.35,
|
| 26 |
+
"europeennes": 0.3,
|
| 27 |
+
"regionales": 0.3,
|
| 28 |
+
"departementales": 0.3,
|
| 29 |
+
"municipales": 0.2,
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def blend_with_type_history(
|
| 34 |
+
preds: np.ndarray,
|
| 35 |
+
feature_df: pd.DataFrame,
|
| 36 |
+
target_type: str,
|
| 37 |
+
) -> np.ndarray:
|
| 38 |
+
base_weight = TYPE_HISTORY_BLEND.get(str(target_type).lower(), 0.0)
|
| 39 |
+
if base_weight <= 0 or preds.size == 0:
|
| 40 |
+
return preds
|
| 41 |
+
hist_cols = [f"prev_share_type_lag1_{cat}" for cat in CANDIDATE_CATEGORIES]
|
| 42 |
+
if not all(col in feature_df.columns for col in hist_cols):
|
| 43 |
+
return preds
|
| 44 |
+
hist_vals = feature_df[hist_cols].to_numpy(dtype=float)
|
| 45 |
+
mask = np.isnan(hist_vals)
|
| 46 |
+
available = (~mask).sum(axis=1).astype(float)
|
| 47 |
+
if np.nanmax(available) == 0:
|
| 48 |
+
return preds
|
| 49 |
+
ratio = (available / len(CANDIDATE_CATEGORIES)).reshape(-1, 1)
|
| 50 |
+
weights = base_weight * ratio
|
| 51 |
+
hist_vals = np.where(mask, preds, hist_vals)
|
| 52 |
+
blended = (1 - weights) * preds + weights * hist_vals
|
| 53 |
+
blended = np.clip(blended, 0, None)
|
| 54 |
+
sums = blended.sum(axis=1, keepdims=True)
|
| 55 |
+
sums[sums == 0] = 1
|
| 56 |
+
return blended / sums
|
| 57 |
|
| 58 |
|
| 59 |
def filter_history(df: pd.DataFrame, target_year: int, commune_code: str | None) -> pd.DataFrame:
|
|
|
|
| 175 |
sums = preds.sum(axis=1, keepdims=True)
|
| 176 |
sums[sums == 0] = 1
|
| 177 |
preds = preds / sums
|
| 178 |
+
target_type = None
|
| 179 |
+
if "election_type" in feature_df.columns and not feature_df.empty:
|
| 180 |
+
target_type = str(feature_df["election_type"].iloc[0])
|
| 181 |
+
if target_type:
|
| 182 |
+
preds = blend_with_type_history(preds, feature_df, target_type)
|
| 183 |
preds_pct = preds * 100
|
| 184 |
|
| 185 |
rows = []
|