stephmnt commited on
Commit
c12ec1a
·
verified ·
1 Parent(s): dfa5812

Sync from GitHub Actions

Browse files
.gitignore CHANGED
@@ -7,6 +7,7 @@ questions.md
7
  /reports/
8
  /data/external/
9
  /data/raw/
 
10
  /datasets/
11
  /data/processed/
12
  /data/contours-france-entiere-latest-v2.geojson
 
7
  /reports/
8
  /data/external/
9
  /data/raw/
10
+ /data/sociodemo/
11
  /datasets/
12
  /data/processed/
13
  /data/contours-france-entiere-latest-v2.geojson
app/gradio_app.py CHANGED
@@ -110,6 +110,14 @@ INTERVAL_BANDS = {
110
  "90% (p05-p95)": ("q05", "q95"),
111
  }
112
  NEUTRAL_MARGIN_SHARE = 0.10
 
 
 
 
 
 
 
 
113
 
114
  try:
115
  from numpy import RankWarning as NP_RANK_WARNING # type: ignore[attr-defined]
@@ -211,6 +219,41 @@ def build_interval_chart(
211
  return plt
212
 
213
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
214
  def apply_transfers(
215
  counts: Dict[str, int],
216
  total_inscrits: int,
@@ -1091,6 +1134,7 @@ class PredictorBackend:
1091
  preds_share = preds.flatten()
1092
 
1093
  preds_by_cat = {cat: float(preds_share[idx]) for idx, cat in enumerate(CANDIDATE_CATEGORIES)}
 
1094
  ordered = ordered_categories()
1095
  share_vec = np.array([preds_by_cat.get(cat, 0.0) for cat in ordered], dtype=float)
1096
 
@@ -1306,7 +1350,7 @@ def create_interface() -> gr.Blocks:
1306
  Choisissez un bureau de vote et une élection cible.
1307
  Le modèle estime un volume par catégorie politique, ainsi que les abstentions, blancs et nuls.
1308
 
1309
- Auteur : [Stéphane Manet](https://manet-conseil.fr) - [Linkedin](https://www.linkedin.com/in/stephanemanet) | [GitHub](https://github.com/stephmnt)
1310
  """
1311
  )
1312
  with gr.Tabs():
 
110
  "90% (p05-p95)": ("q05", "q95"),
111
  }
112
  NEUTRAL_MARGIN_SHARE = 0.10
113
+ TYPE_HISTORY_BLEND = {
114
+ "presidentielles": 0.4,
115
+ "legislatives": 0.35,
116
+ "europeennes": 0.3,
117
+ "regionales": 0.3,
118
+ "departementales": 0.3,
119
+ "municipales": 0.2,
120
+ }
121
 
122
  try:
123
  from numpy import RankWarning as NP_RANK_WARNING # type: ignore[attr-defined]
 
219
  return plt
220
 
221
 
222
+ def blend_with_type_history(
223
+ preds_by_cat: Dict[str, float],
224
+ row: pd.Series,
225
+ target_type: str,
226
+ ) -> Dict[str, float]:
227
+ base_weight = TYPE_HISTORY_BLEND.get(str(target_type).lower(), 0.0)
228
+ if base_weight <= 0:
229
+ return preds_by_cat
230
+ available = 0
231
+ hist_vals: Dict[str, float | None] = {}
232
+ for cat in CANDIDATE_CATEGORIES:
233
+ val = row.get(f"prev_share_type_lag1_{cat}")
234
+ if val is not None and not pd.isna(val):
235
+ hist_vals[cat] = float(val)
236
+ available += 1
237
+ else:
238
+ hist_vals[cat] = None
239
+ if available == 0:
240
+ return preds_by_cat
241
+ weight = base_weight * (available / len(CANDIDATE_CATEGORIES))
242
+ blended: Dict[str, float] = {}
243
+ for cat in CANDIDATE_CATEGORIES:
244
+ base = float(preds_by_cat.get(cat, 0.0))
245
+ hist = hist_vals.get(cat)
246
+ if hist is None:
247
+ blended[cat] = base
248
+ else:
249
+ blended[cat] = (1 - weight) * base + weight * hist
250
+ total = sum(blended.values())
251
+ if total > 0:
252
+ for cat in blended:
253
+ blended[cat] /= total
254
+ return blended
255
+
256
+
257
  def apply_transfers(
258
  counts: Dict[str, int],
259
  total_inscrits: int,
 
1134
  preds_share = preds.flatten()
1135
 
1136
  preds_by_cat = {cat: float(preds_share[idx]) for idx, cat in enumerate(CANDIDATE_CATEGORIES)}
1137
+ preds_by_cat = blend_with_type_history(preds_by_cat, row.iloc[0], target_type)
1138
  ordered = ordered_categories()
1139
  share_vec = np.array([preds_by_cat.get(cat, 0.0) for cat in ordered], dtype=float)
1140
 
 
1350
  Choisissez un bureau de vote et une élection cible.
1351
  Le modèle estime un volume par catégorie politique, ainsi que les abstentions, blancs et nuls.
1352
 
1353
+ Auteur : [Stéphane Manet](https://manet-conseil.fr) - [Linkedin](https://www.linkedin.com/in/stephanemanet) - [GitHub](https://github.com/stephmnt)
1354
  """
1355
  )
1356
  with gr.Tabs():
data/interim/elections_long.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:70fc51d5dd8303c51339a95f818198ba0cc5f26e2a3dc951eae664eb8953a54d
3
- size 2216814
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:976ddedf341f8c4be434dc72e6246b0b8bb19cfede0efa3c7aa8865b47ca1c01
3
+ size 2398565
data/mapping_candidats_blocs.csv CHANGED
@@ -5,7 +5,7 @@ DIV;Divers;centre;;
5
  LDVD;Divers droite;droite_modere;droite_dure
6
  LDVG;Divers gauche;gauche_modere;
7
  LUG;Union de la gauche;;gauche_modere
8
- LUD;Union de la droite;droite;droite_modere
9
  LFN;Front national;extreme_droite;;
10
  LEXG;Extreme gauche;extreme_gauche;;
11
  LSOC;Parti socialiste;gauche_modere;
 
5
  LDVD;Divers droite;droite_modere;droite_dure
6
  LDVG;Divers gauche;gauche_modere;
7
  LUG;Union de la gauche;;gauche_modere
8
+ LUD;Union de la droite;droite_dure;droite_modere
9
  LFN;Front national;extreme_droite;;
10
  LEXG;Extreme gauche;extreme_gauche;;
11
  LSOC;Parti socialiste;gauche_modere;
src/model/predict.py CHANGED
@@ -20,6 +20,40 @@ from src.features.build_features import (
20
  )
21
 
22
  LOGGER = logging.getLogger(__name__)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
 
25
  def filter_history(df: pd.DataFrame, target_year: int, commune_code: str | None) -> pd.DataFrame:
@@ -141,6 +175,11 @@ def predict(
141
  sums = preds.sum(axis=1, keepdims=True)
142
  sums[sums == 0] = 1
143
  preds = preds / sums
 
 
 
 
 
144
  preds_pct = preds * 100
145
 
146
  rows = []
 
20
  )
21
 
22
  LOGGER = logging.getLogger(__name__)
23
+ TYPE_HISTORY_BLEND = {
24
+ "presidentielles": 0.4,
25
+ "legislatives": 0.35,
26
+ "europeennes": 0.3,
27
+ "regionales": 0.3,
28
+ "departementales": 0.3,
29
+ "municipales": 0.2,
30
+ }
31
+
32
+
33
+ def blend_with_type_history(
34
+ preds: np.ndarray,
35
+ feature_df: pd.DataFrame,
36
+ target_type: str,
37
+ ) -> np.ndarray:
38
+ base_weight = TYPE_HISTORY_BLEND.get(str(target_type).lower(), 0.0)
39
+ if base_weight <= 0 or preds.size == 0:
40
+ return preds
41
+ hist_cols = [f"prev_share_type_lag1_{cat}" for cat in CANDIDATE_CATEGORIES]
42
+ if not all(col in feature_df.columns for col in hist_cols):
43
+ return preds
44
+ hist_vals = feature_df[hist_cols].to_numpy(dtype=float)
45
+ mask = np.isnan(hist_vals)
46
+ available = (~mask).sum(axis=1).astype(float)
47
+ if np.nanmax(available) == 0:
48
+ return preds
49
+ ratio = (available / len(CANDIDATE_CATEGORIES)).reshape(-1, 1)
50
+ weights = base_weight * ratio
51
+ hist_vals = np.where(mask, preds, hist_vals)
52
+ blended = (1 - weights) * preds + weights * hist_vals
53
+ blended = np.clip(blended, 0, None)
54
+ sums = blended.sum(axis=1, keepdims=True)
55
+ sums[sums == 0] = 1
56
+ return blended / sums
57
 
58
 
59
  def filter_history(df: pd.DataFrame, target_year: int, commune_code: str | None) -> pd.DataFrame:
 
175
  sums = preds.sum(axis=1, keepdims=True)
176
  sums[sums == 0] = 1
177
  preds = preds / sums
178
+ target_type = None
179
+ if "election_type" in feature_df.columns and not feature_df.empty:
180
+ target_type = str(feature_df["election_type"].iloc[0])
181
+ if target_type:
182
+ preds = blend_with_type_history(preds, feature_df, target_type)
183
  preds_pct = preds * 100
184
 
185
  rows = []