Spaces:

stephmnt
/

bdv

Sleeping

App Files Files Community

stephmnt commited on Jan 6

Commit

c12ec1a

verified ·

1 Parent(s): dfa5812

Sync from GitHub Actions

Browse files

Files changed (5) hide show

.gitignore +1 -0
app/gradio_app.py +45 -1
data/interim/elections_long.parquet +2 -2
data/mapping_candidats_blocs.csv +1 -1
src/model/predict.py +39 -0

.gitignore CHANGED Viewed

@@ -7,6 +7,7 @@ questions.md
 /reports/
 /data/external/
 /data/raw/
 /datasets/
 /data/processed/
 /data/contours-france-entiere-latest-v2.geojson

 /reports/
 /data/external/
 /data/raw/
+/data/sociodemo/
 /datasets/
 /data/processed/
 /data/contours-france-entiere-latest-v2.geojson

app/gradio_app.py CHANGED Viewed

@@ -110,6 +110,14 @@ INTERVAL_BANDS = {
     "90% (p05-p95)": ("q05", "q95"),
 }
 NEUTRAL_MARGIN_SHARE = 0.10
 try:
     from numpy import RankWarning as NP_RANK_WARNING  # type: ignore[attr-defined]
@@ -211,6 +219,41 @@ def build_interval_chart(
     return plt
 def apply_transfers(
     counts: Dict[str, int],
     total_inscrits: int,
@@ -1091,6 +1134,7 @@ class PredictorBackend:
         preds_share = preds.flatten()
         preds_by_cat = {cat: float(preds_share[idx]) for idx, cat in enumerate(CANDIDATE_CATEGORIES)}
         ordered = ordered_categories()
         share_vec = np.array([preds_by_cat.get(cat, 0.0) for cat in ordered], dtype=float)
@@ -1306,7 +1350,7 @@ def create_interface() -> gr.Blocks:
             Choisissez un bureau de vote et une élection cible.
             Le modèle estime un volume par catégorie politique, ainsi que les abstentions, blancs et nuls.
-            Auteur : [Stéphane Manet](https://manet-conseil.fr) - [Linkedin](https://www.linkedin.com/in/stephanemanet) | [GitHub](https://github.com/stephmnt)
             """
         )
         with gr.Tabs():

     "90% (p05-p95)": ("q05", "q95"),
 }
 NEUTRAL_MARGIN_SHARE = 0.10
+TYPE_HISTORY_BLEND = {
+    "presidentielles": 0.4,
+    "legislatives": 0.35,
+    "europeennes": 0.3,
+    "regionales": 0.3,
+    "departementales": 0.3,
+    "municipales": 0.2,
+}
 try:
     from numpy import RankWarning as NP_RANK_WARNING  # type: ignore[attr-defined]
     return plt
+def blend_with_type_history(
+    preds_by_cat: Dict[str, float],
+    row: pd.Series,
+    target_type: str,
+) -> Dict[str, float]:
+    base_weight = TYPE_HISTORY_BLEND.get(str(target_type).lower(), 0.0)
+    if base_weight <= 0:
+        return preds_by_cat
+    available = 0
+    hist_vals: Dict[str, float | None] = {}
+    for cat in CANDIDATE_CATEGORIES:
+        val = row.get(f"prev_share_type_lag1_{cat}")
+        if val is not None and not pd.isna(val):
+            hist_vals[cat] = float(val)
+            available += 1
+        else:
+            hist_vals[cat] = None
+    if available == 0:
+        return preds_by_cat
+    weight = base_weight * (available / len(CANDIDATE_CATEGORIES))
+    blended: Dict[str, float] = {}
+    for cat in CANDIDATE_CATEGORIES:
+        base = float(preds_by_cat.get(cat, 0.0))
+        hist = hist_vals.get(cat)
+        if hist is None:
+            blended[cat] = base
+        else:
+            blended[cat] = (1 - weight) * base + weight * hist
+    total = sum(blended.values())
+    if total > 0:
+        for cat in blended:
+            blended[cat] /= total
+    return blended
 def apply_transfers(
     counts: Dict[str, int],
     total_inscrits: int,
         preds_share = preds.flatten()
         preds_by_cat = {cat: float(preds_share[idx]) for idx, cat in enumerate(CANDIDATE_CATEGORIES)}
+        preds_by_cat = blend_with_type_history(preds_by_cat, row.iloc[0], target_type)
         ordered = ordered_categories()
         share_vec = np.array([preds_by_cat.get(cat, 0.0) for cat in ordered], dtype=float)
             Choisissez un bureau de vote et une élection cible.
             Le modèle estime un volume par catégorie politique, ainsi que les abstentions, blancs et nuls.
+            Auteur : [Stéphane Manet](https://manet-conseil.fr) - [Linkedin](https://www.linkedin.com/in/stephanemanet) - [GitHub](https://github.com/stephmnt)
             """
         )
         with gr.Tabs():

data/interim/elections_long.parquet CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:70fc51d5dd8303c51339a95f818198ba0cc5f26e2a3dc951eae664eb8953a54d
-size 2216814

 version https://git-lfs.github.com/spec/v1
+oid sha256:976ddedf341f8c4be434dc72e6246b0b8bb19cfede0efa3c7aa8865b47ca1c01
+size 2398565

data/mapping_candidats_blocs.csv CHANGED Viewed

@@ -5,7 +5,7 @@ DIV;Divers;centre;;
 LDVD;Divers droite;droite_modere;droite_dure
 LDVG;Divers gauche;gauche_modere;
 LUG;Union de la gauche;;gauche_modere
-LUD;Union de la droite;droite;droite_modere
 LFN;Front national;extreme_droite;;
 LEXG;Extreme gauche;extreme_gauche;;
 LSOC;Parti socialiste;gauche_modere;

 LDVD;Divers droite;droite_modere;droite_dure
 LDVG;Divers gauche;gauche_modere;
 LUG;Union de la gauche;;gauche_modere
+LUD;Union de la droite;droite_dure;droite_modere
 LFN;Front national;extreme_droite;;
 LEXG;Extreme gauche;extreme_gauche;;
 LSOC;Parti socialiste;gauche_modere;

src/model/predict.py CHANGED Viewed

@@ -20,6 +20,40 @@ from src.features.build_features import (
 )
 LOGGER = logging.getLogger(__name__)
 def filter_history(df: pd.DataFrame, target_year: int, commune_code: str | None) -> pd.DataFrame:
@@ -141,6 +175,11 @@ def predict(
     sums = preds.sum(axis=1, keepdims=True)
     sums[sums == 0] = 1
     preds = preds / sums
     preds_pct = preds * 100
     rows = []

 )
 LOGGER = logging.getLogger(__name__)
+TYPE_HISTORY_BLEND = {
+    "presidentielles": 0.4,
+    "legislatives": 0.35,
+    "europeennes": 0.3,
+    "regionales": 0.3,
+    "departementales": 0.3,
+    "municipales": 0.2,
+}
+def blend_with_type_history(
+    preds: np.ndarray,
+    feature_df: pd.DataFrame,
+    target_type: str,
+) -> np.ndarray:
+    base_weight = TYPE_HISTORY_BLEND.get(str(target_type).lower(), 0.0)
+    if base_weight <= 0 or preds.size == 0:
+        return preds
+    hist_cols = [f"prev_share_type_lag1_{cat}" for cat in CANDIDATE_CATEGORIES]
+    if not all(col in feature_df.columns for col in hist_cols):
+        return preds
+    hist_vals = feature_df[hist_cols].to_numpy(dtype=float)
+    mask = np.isnan(hist_vals)
+    available = (~mask).sum(axis=1).astype(float)
+    if np.nanmax(available) == 0:
+        return preds
+    ratio = (available / len(CANDIDATE_CATEGORIES)).reshape(-1, 1)
+    weights = base_weight * ratio
+    hist_vals = np.where(mask, preds, hist_vals)
+    blended = (1 - weights) * preds + weights * hist_vals
+    blended = np.clip(blended, 0, None)
+    sums = blended.sum(axis=1, keepdims=True)
+    sums[sums == 0] = 1
+    return blended / sums
 def filter_history(df: pd.DataFrame, target_year: int, commune_code: str | None) -> pd.DataFrame:
     sums = preds.sum(axis=1, keepdims=True)
     sums[sums == 0] = 1
     preds = preds / sums
+    target_type = None
+    if "election_type" in feature_df.columns and not feature_df.empty:
+        target_type = str(feature_df["election_type"].iloc[0])
+    if target_type:
+        preds = blend_with_type_history(preds, feature_df, target_type)
     preds_pct = preds * 100
     rows = []