bdv / app /gradio_app.py
stephmnt's picture
Sync from GitHub Actions
047370a verified
from __future__ import annotations
import base64
import io
import json
import logging
import re
import warnings
from html import escape
from pathlib import Path
from typing import Dict, Tuple
import gradio as gr
import joblib
import numpy as np
import pandas as pd
import sqlalchemy as sa
from src.constants import CANDIDATE_CATEGORIES
from src.db.schema import get_engine
from src.features.build_features import (
aggregate_by_event,
compute_national_reference,
expand_by_category,
load_elections_long,
load_mapping,
)
LOGGER = logging.getLogger(__name__)
COMMUNE_CODE_SETE = "34301"
MODEL_DIR = Path("models")
FEATURE_COLS_PATH = MODEL_DIR / "feature_columns.json"
RESIDUAL_INTERVALS_PATH = Path("reports/residual_intervals.json")
GEO_DIR = Path("data/geo")
DEFAULT_TARGETS = [
("municipales", 2026),
("legislatives", 2027),
("presidentielles", 2027),
]
FEATURE_CACHE: Dict[Tuple[str, int], Tuple[pd.DataFrame, Dict[str, Dict[Tuple[str, str], float]]]] = {}
ELECTION_KEY_SEP = "|"
ELECTION_TYPE_LABELS = {
"municipales": "Municipales",
"legislatives": "Législatives",
"presidentielles": "Présidentielles",
"europeennes": "Européennes",
"regionales": "Régionales",
"departementales": "Départementales",
}
HISTORY_OUTPUT_COLUMNS = ["categorie", "score_%"]
PREDICTION_OUTPUT_COLUMNS = ["categorie", "nombre"]
INTERVAL_OUTPUT_COLUMNS = ["categorie", "baseline_%", "min_%", "max_%", "baseline", "min", "max"]
SIM_OUTPUT_COLUMNS = ["categorie", "baseline", "apres_transfert", "delta"]
OPPORTUNITY_OUTPUT_COLUMNS = [
"bureau",
"gain_cible",
"score_base",
"score_apres",
"top_base",
"top_apres",
"bascule",
]
DISPLAY_CATEGORY_ORDER = [
"extreme_gauche",
"gauche_dure",
"gauche_modere",
"centre",
"droite_modere",
"droite_dure",
"extreme_droite",
]
PREDICTION_CATEGORY_ORDER = DISPLAY_CATEGORY_ORDER + ["blancs", "nuls", "abstention"]
DISPLAY_CATEGORY_LABELS = {
"extreme_gauche": "extrême-gauche",
"gauche_dure": "gauche dure",
"gauche_modere": "gauche modérée",
"centre": "centre",
"droite_modere": "droite modérée",
"droite_dure": "droite dure",
"extreme_droite": "extrême-droite",
"blancs": "blancs",
"nuls": "nuls",
"abstention": "abstentions",
}
DISPLAY_CATEGORY_COLORS = {
"extreme_gauche": "#7f1d1d",
"gauche_dure": "#dc2626",
"gauche_modere": "#f472b6",
"centre": "#facc15",
"droite_modere": "#60a5fa",
"droite_dure": "#1e3a8a",
"extreme_droite": "#111827",
}
EXTRA_CATEGORY_COLORS = {
"blancs": "#e5e7eb",
"nuls": "#9ca3af",
"abstention": "#6b7280",
}
DISPLAY_LABEL_COLORS = {
DISPLAY_CATEGORY_LABELS[key]: color for key, color in DISPLAY_CATEGORY_COLORS.items()
}
DISPLAY_LABEL_COLORS.update(
{DISPLAY_CATEGORY_LABELS[key]: color for key, color in EXTRA_CATEGORY_COLORS.items()}
)
CATEGORY_LABEL_TO_KEY = {label: key for key, label in DISPLAY_CATEGORY_LABELS.items()}
TRANSFER_CATEGORY_LABELS = [DISPLAY_CATEGORY_LABELS[key] for key in PREDICTION_CATEGORY_ORDER]
DEFAULT_RESIDUAL_SPREAD = 0.03
INTERVAL_BANDS = {
"80% (p10-p90)": ("q10", "q90"),
"90% (p05-p95)": ("q05", "q95"),
}
NEUTRAL_MARGIN_SHARE = 0.10
TYPE_HISTORY_BLEND = {
"presidentielles": 0.4,
"legislatives": 0.35,
"europeennes": 0.3,
"regionales": 0.3,
"departementales": 0.3,
"municipales": 0.2,
}
try:
from numpy import RankWarning as NP_RANK_WARNING # type: ignore[attr-defined]
except Exception:
class NP_RANK_WARNING(UserWarning):
pass
def ordered_categories() -> list[str]:
return [cat for cat in DISPLAY_CATEGORY_ORDER if cat in CANDIDATE_CATEGORIES]
def load_residual_intervals(path: Path = RESIDUAL_INTERVALS_PATH) -> Dict[str, object]:
if not path.exists():
return {}
try:
payload = json.loads(path.read_text())
except Exception:
return {}
if isinstance(payload, dict):
return payload
return {}
def get_interval_bounds(
residuals: Dict[str, Dict[str, float]],
category: str,
band_label: str,
) -> Tuple[float, float]:
keys = INTERVAL_BANDS.get(band_label, ("q10", "q90"))
cat_resid = residuals.get(category, {})
low = cat_resid.get(keys[0])
high = cat_resid.get(keys[1])
if low is None or high is None:
return -DEFAULT_RESIDUAL_SPREAD, DEFAULT_RESIDUAL_SPREAD
return float(low), float(high)
def build_interval_table(
shares_by_cat: Dict[str, float],
exprimes_total: int,
residuals: Dict[str, Dict[str, float]],
band_label: str,
) -> pd.DataFrame:
rows = []
for cat in ordered_categories():
share = float(shares_by_cat.get(cat, 0.0))
low_resid, high_resid = get_interval_bounds(residuals, cat, band_label)
share_low = float(np.clip(share + low_resid, 0.0, 1.0))
share_high = float(np.clip(share + high_resid, 0.0, 1.0))
count = int(round(share * exprimes_total))
count_low = int(round(share_low * exprimes_total))
count_high = int(round(share_high * exprimes_total))
if count_low > count_high:
count_low, count_high = count_high, count_low
share_low, share_high = share_high, share_low
rows.append(
{
"categorie": DISPLAY_CATEGORY_LABELS.get(cat, cat),
"baseline_%": round(share * 100, 1),
"min_%": round(share_low * 100, 1),
"max_%": round(share_high * 100, 1),
"baseline": count,
"min": count_low,
"max": count_high,
}
)
return pd.DataFrame(rows, columns=INTERVAL_OUTPUT_COLUMNS)
def build_interval_chart(
df: pd.DataFrame,
*,
value_col: str = "baseline",
low_col: str = "min",
high_col: str = "max",
color_map: Dict[str, str] | None = None,
ylabel: str = "Nombre d'électeurs",
):
try:
import matplotlib.pyplot as plt
except Exception:
return None
if df.empty or value_col not in df.columns:
return None
labels = df["categorie"].astype(str).tolist()
values = df[value_col].astype(float).to_numpy()
low_vals = df[low_col].astype(float).to_numpy()
high_vals = df[high_col].astype(float).to_numpy()
lower_err = np.maximum(0.0, values - low_vals)
upper_err = np.maximum(0.0, high_vals - values)
yerr = np.vstack([lower_err, upper_err])
colors = [color_map.get(label, "#3b82f6") for label in labels] if color_map else "#3b82f6"
plt.figure(figsize=(6, 3))
plt.bar(labels, values, color=colors, yerr=yerr, capsize=4)
plt.xticks(rotation=30, ha="right")
plt.ylabel(ylabel)
plt.tight_layout()
return plt
def blend_with_type_history(
preds_by_cat: Dict[str, float],
row: pd.Series,
target_type: str,
) -> Dict[str, float]:
base_weight = TYPE_HISTORY_BLEND.get(str(target_type).lower(), 0.0)
if base_weight <= 0:
return preds_by_cat
available = 0
hist_vals: Dict[str, float | None] = {}
for cat in CANDIDATE_CATEGORIES:
val = row.get(f"prev_share_type_lag1_{cat}")
if val is not None and not pd.isna(val):
hist_vals[cat] = float(val)
available += 1
else:
hist_vals[cat] = None
if available == 0:
return preds_by_cat
weight = base_weight * (available / len(CANDIDATE_CATEGORIES))
blended: Dict[str, float] = {}
for cat in CANDIDATE_CATEGORIES:
base = float(preds_by_cat.get(cat, 0.0))
hist = hist_vals.get(cat)
if hist is None:
blended[cat] = base
else:
blended[cat] = (1 - weight) * base + weight * hist
total = sum(blended.values())
if total > 0:
for cat in blended:
blended[cat] /= total
return blended
def _normalize_override_pct(value: float | None) -> float | None:
if value is None:
return None
try:
val = float(value)
except (TypeError, ValueError):
return None
if np.isnan(val):
return None
return float(np.clip(val, 0.0, 100.0))
def apply_share_overrides(
preds_by_cat: Dict[str, float],
overrides_pct: Dict[str, float] | None,
ordered: list[str],
) -> Dict[str, float]:
if not overrides_pct:
return preds_by_cat
fixed = {}
for cat, pct in overrides_pct.items():
if cat not in ordered:
continue
norm = _normalize_override_pct(pct)
if norm is None:
continue
fixed[cat] = norm / 100.0
if not fixed:
return preds_by_cat
fixed_sum = sum(fixed.values())
if fixed_sum >= 1.0:
scaled = {cat: (val / fixed_sum) for cat, val in fixed.items() if fixed_sum > 0}
return {cat: float(scaled.get(cat, 0.0)) for cat in ordered}
remaining = 1.0 - fixed_sum
residual_cats = [cat for cat in ordered if cat not in fixed]
base_sum = sum(float(preds_by_cat.get(cat, 0.0)) for cat in residual_cats)
if base_sum <= 0 and residual_cats:
per_cat = remaining / len(residual_cats)
base_alloc = {cat: per_cat for cat in residual_cats}
else:
base_alloc = {
cat: (float(preds_by_cat.get(cat, 0.0)) / base_sum) * remaining
for cat in residual_cats
}
merged = {cat: float(base_alloc.get(cat, 0.0)) for cat in ordered}
for cat, val in fixed.items():
merged[cat] = float(val)
return merged
def apply_transfers(
counts: Dict[str, int],
total_inscrits: int,
transfers: list[Tuple[str, str, float]],
) -> Dict[str, int]:
updated = {key: int(value) for key, value in counts.items()}
for source, target, delta_pct in transfers:
if delta_pct <= 0 or source == target:
continue
delta_count = int(round(total_inscrits * float(delta_pct) / 100.0))
if delta_count <= 0:
continue
available = max(0, int(updated.get(source, 0)))
moved = min(available, delta_count)
updated[source] = available - moved
updated[target] = int(updated.get(target, 0)) + moved
return updated
def build_simulation_table(
baseline: Dict[str, int],
updated: Dict[str, int],
) -> pd.DataFrame:
rows = []
for cat in PREDICTION_CATEGORY_ORDER:
base = int(baseline.get(cat, 0))
new = int(updated.get(cat, 0))
rows.append(
{
"categorie": DISPLAY_CATEGORY_LABELS.get(cat, cat),
"baseline": base,
"apres_transfert": new,
"delta": new - base,
}
)
return pd.DataFrame(rows, columns=SIM_OUTPUT_COLUMNS)
def load_geojson_features(geo_dir: Path = GEO_DIR) -> list[dict]:
if not geo_dir.exists():
return []
paths = sorted(geo_dir.glob("*.geojson")) + sorted(geo_dir.glob("*.json"))
features: list[dict] = []
for path in paths:
try:
payload = json.loads(path.read_text())
except Exception:
continue
if isinstance(payload, dict):
features.extend(payload.get("features", []))
return features
def extract_bureau_number(label: str | None) -> int | None:
if not label:
return None
match = re.search(r"(\d+)", str(label))
if not match:
return None
try:
return int(match.group(1))
except ValueError:
return None
def match_bureau_code(commune_code: str, bureau_num: int, available_codes: set[str]) -> str:
padded = str(bureau_num).zfill(4)
candidates = [f"{commune_code}-{padded}", f"{commune_code}{padded}"]
for candidate in candidates:
if candidate in available_codes:
return candidate
return candidates[-1]
def _iter_coords(geom: dict) -> list[Tuple[float, float]]:
coords = []
geom_type = geom.get("type")
if geom_type == "Polygon":
for ring in geom.get("coordinates", []):
coords.extend([(lon, lat) for lon, lat in ring])
elif geom_type == "MultiPolygon":
for polygon in geom.get("coordinates", []):
for ring in polygon:
coords.extend([(lon, lat) for lon, lat in ring])
return coords
def geojson_bounds(features: list[dict]) -> Tuple[Tuple[float, float], Tuple[float, float]] | None:
lons = []
lats = []
for feature in features:
geom = feature.get("geometry") or {}
for lon, lat in _iter_coords(geom):
lons.append(lon)
lats.append(lat)
if not lons or not lats:
return None
return (min(lats), min(lons)), (max(lats), max(lons))
def build_prediction_table_from_counts(counts_by_cat: Dict[str, int]) -> pd.DataFrame:
rows = []
for cat in ordered_categories():
rows.append({"categorie": DISPLAY_CATEGORY_LABELS.get(cat, cat), "nombre": int(counts_by_cat.get(cat, 0))})
for extra in ["blancs", "nuls", "abstention"]:
rows.append(
{
"categorie": DISPLAY_CATEGORY_LABELS[extra],
"nombre": int(counts_by_cat.get(extra, 0)),
}
)
return pd.DataFrame(rows, columns=PREDICTION_OUTPUT_COLUMNS)
def chart_base64_from_df(
df: pd.DataFrame,
value_col: str,
ylabel: str,
color_map: Dict[str, str],
) -> str | None:
try:
import matplotlib.pyplot as plt
except Exception:
return None
if df.empty or value_col not in df.columns:
return None
labels = df["categorie"].astype(str).tolist()
values = pd.to_numeric(df[value_col], errors="coerce").fillna(0).tolist()
colors = [color_map.get(label, "#3b82f6") for label in labels]
fig, ax = plt.subplots(figsize=(4.5, 3.2))
ax.barh(labels, values, color=colors)
ax.invert_yaxis()
ax.set_xlabel(ylabel)
ax.tick_params(axis="y", labelsize=8)
fig.tight_layout()
buf = io.BytesIO()
fig.savefig(buf, format="png", dpi=150)
plt.close(fig)
return base64.b64encode(buf.getvalue()).decode("ascii")
def build_map_popup_html(
bureau_label: str,
table_df: pd.DataFrame,
chart_b64: str | None,
meta: str | None,
) -> str:
title_html = f"<strong>{escape(bureau_label)}</strong>"
meta_html = f"<div style='margin:4px 0;'>{escape(meta)}</div>" if meta else ""
table_html = table_df.to_html(index=False, border=0)
img_html = ""
if chart_b64:
img_html = (
"<div style='margin-top:6px;'>"
f"<img src='data:image/png;base64,{chart_b64}' style='width:320px;height:auto;'/>"
"</div>"
)
return f"<div style='font-size:12px;'>{title_html}{meta_html}{table_html}{img_html}</div>"
def build_map_legend_html() -> str:
parts = []
for key in DISPLAY_CATEGORY_ORDER:
label = DISPLAY_CATEGORY_LABELS.get(key, key)
color = DISPLAY_CATEGORY_COLORS.get(key, "#9ca3af")
parts.append(
f"<span style='display:inline-flex;align-items:center;margin-right:10px;'>"
f"<span style='width:12px;height:12px;background:{color};display:inline-block;margin-right:6px;border:1px solid #111827;'></span>"
f"{escape(label)}</span>"
)
parts.append(
"<span style='display:inline-flex;align-items:center;margin-right:10px;'>"
"<span style='width:12px;height:12px;background:#ffffff;display:inline-block;margin-right:6px;border:1px solid #111827;'></span>"
"écart gauche/droite ≤ 10%</span>"
)
parts.append(
"<span style='display:inline-flex;align-items:center;margin-right:10px;'>"
"<span style='width:12px;height:12px;background:#9ca3af;display:inline-block;margin-right:6px;border:1px solid #111827;'></span>"
"données indisponibles</span>"
)
parts.append("<span style='font-size:12px;color:#6b7280;'>abstention non utilisée pour la couleur</span>")
return "<div style='margin-bottom:8px;'>" + " ".join(parts) + "</div>"
def build_bureau_map_html(
backend: "PredictorBackend",
target_type: str,
target_year: int,
) -> str:
try:
import folium
except Exception:
return "<p>Folium n'est pas disponible. Installe-le via requirements.txt.</p>"
features = load_geojson_features()
if not features:
return "<p>Aucune geojson trouvée dans data/geo.</p>"
bounds = geojson_bounds(features)
if bounds is None:
return "<p>Impossible de calculer l'emprise de la carte.</p>"
(min_lat, min_lon), (max_lat, max_lon) = bounds
center = [(min_lat + max_lat) / 2, (min_lon + max_lon) / 2]
fmap = folium.Map(location=center, zoom_start=13, tiles="cartodbpositron")
available_codes = set(backend.available_bureaux())
for feature in features:
props = feature.get("properties", {})
label = props.get("name") or "Bureau"
bureau_num = extract_bureau_number(label)
if bureau_num is None:
code_bv = None
else:
code_bv = match_bureau_code(COMMUNE_CODE_SETE, bureau_num, available_codes)
fill_color = "#9ca3af"
popup_html = None
if code_bv is not None:
details, _, meta = backend.predict_bureau_details(code_bv, target_type, target_year)
if details is not None:
shares = details["shares_by_cat"]
left_share = float(shares.get("gauche_dure", 0.0) + shares.get("gauche_modere", 0.0))
right_share = float(shares.get("droite_dure", 0.0) + shares.get("droite_modere", 0.0))
if abs(left_share - right_share) <= NEUTRAL_MARGIN_SHARE:
fill_color = "#ffffff"
else:
winner = max(shares, key=shares.get)
fill_color = DISPLAY_CATEGORY_COLORS.get(winner, fill_color)
table_df = build_prediction_table_from_counts(details["counts"])
chart_b64 = chart_base64_from_df(
table_df,
value_col="nombre",
ylabel="Nombre d'electeurs",
color_map=DISPLAY_LABEL_COLORS,
)
popup_html = build_map_popup_html(str(label), table_df, chart_b64, meta)
def _style(_: dict, color=fill_color):
return {
"fillColor": color,
"color": "#111827",
"weight": 1,
"fillOpacity": 0.6,
}
geo = folium.GeoJson(feature, style_function=_style)
if popup_html:
geo.add_child(folium.Popup(popup_html, max_width=450))
geo.add_child(folium.Tooltip(str(label)))
geo.add_to(fmap)
fmap.fit_bounds([[min_lat, min_lon], [max_lat, max_lon]])
return fmap._repr_html_()
def _project_rate(
series: pd.Series,
years: pd.Series,
target_year: int,
*,
min_points_trend: int = 3,
clamp_to_observed: bool = True,
) -> float | None:
df = pd.DataFrame(
{
"value": pd.to_numeric(series, errors="coerce"),
"year": pd.to_numeric(years, errors="coerce"),
}
).dropna()
if df.empty:
return None
values = df["value"].to_numpy()
years_arr = df["year"].to_numpy()
if len(set(years_arr)) >= min_points_trend and len(df) >= min_points_trend:
with warnings.catch_warnings():
warnings.simplefilter("ignore", category=NP_RANK_WARNING)
try:
slope, intercept = np.polyfit(years_arr, values, 1)
projected = slope * target_year + intercept
except Exception:
projected = values[-1]
else:
projected = values[-1]
if clamp_to_observed and len(values):
projected = min(max(projected, float(np.nanmin(values))), float(np.nanmax(values)))
return float(min(1.0, max(0.0, projected)))
def _allocate_counts(shares: np.ndarray, total: int) -> np.ndarray:
if total <= 0 or shares.size == 0:
return np.zeros_like(shares, dtype=int)
shares = np.clip(shares, 0, None)
if shares.sum() == 0:
return np.zeros_like(shares, dtype=int)
shares = shares / shares.sum()
raw = shares * total
floors = np.floor(raw)
remainder = int(total - floors.sum())
if remainder > 0:
order = np.argsort(-(raw - floors))
for idx in order[:remainder]:
floors[idx] += 1
return floors.astype(int)
def load_bureau_event_stats(commune_code: str) -> pd.DataFrame:
candidates = [
Path("data/processed/elections_blocs.parquet"),
Path("data/processed/elections_blocs.csv"),
Path("data/interim/elections_long.parquet"),
Path("data/interim/elections_long.csv"),
]
df = pd.DataFrame()
best = pd.DataFrame()
for path in candidates:
if not path.exists():
continue
if path.suffix == ".parquet":
df = pd.read_parquet(path)
else:
df = pd.read_csv(path, sep=";")
if df.empty:
continue
if "type_scrutin" not in df.columns and "election_type" in df.columns:
df["type_scrutin"] = df["election_type"]
if "annee" not in df.columns and "election_year" in df.columns:
df["annee"] = df["election_year"]
if "tour" not in df.columns and "round" in df.columns:
df["tour"] = df["round"]
df["date_scrutin"] = pd.to_datetime(df.get("date_scrutin"), errors="coerce")
for col in ["inscrits", "votants", "blancs", "nuls"]:
if col in df.columns:
df[col] = pd.to_numeric(df[col], errors="coerce")
else:
df[col] = np.nan
if "code_commune" in df.columns:
df["code_commune"] = df["code_commune"].astype(str)
df = df[df["code_commune"] == str(commune_code)]
else:
df = df[df["code_bv"].astype(str).str.startswith(str(commune_code))]
df = df.dropna(subset=["code_bv"])
if df.empty:
continue
has_blancs = df["blancs"].notna().any() or df["nuls"].notna().any()
if has_blancs:
best = df
break
if best.empty:
best = df
df = best
if df.empty:
return df
group_cols = [col for col in ["code_bv", "type_scrutin", "annee", "tour", "date_scrutin"] if col in df.columns]
agg = df.groupby(group_cols, as_index=False).agg(
inscrits=("inscrits", "max"),
votants=("votants", "max"),
blancs=("blancs", "max"),
nuls=("nuls", "max"),
)
if "date_scrutin" in agg.columns:
agg = agg.sort_values("date_scrutin")
agg["election_type"] = agg.get("type_scrutin")
agg["election_type"] = agg["election_type"].astype("string").str.strip().str.lower()
agg["election_year"] = pd.to_numeric(agg.get("annee"), errors="coerce")
agg["round"] = pd.to_numeric(agg.get("tour"), errors="coerce").fillna(1).astype(int)
base_inscrits = agg["inscrits"].replace(0, np.nan)
agg["turnout_pct"] = agg["votants"] / base_inscrits
agg["blancs_pct"] = agg["blancs"] / base_inscrits
agg["nuls_pct"] = agg["nuls"] / base_inscrits
return agg[
[
"code_bv",
"election_type",
"election_year",
"round",
"date_scrutin",
"inscrits",
"votants",
"blancs",
"nuls",
"turnout_pct",
"blancs_pct",
"nuls_pct",
]
]
def load_commune_event_stats(commune_code: str) -> pd.DataFrame:
candidates = [
Path("data/processed/commune_event_stats.parquet"),
Path("data/processed/commune_event_stats.csv"),
]
df = pd.DataFrame()
for path in candidates:
if not path.exists():
continue
if path.suffix == ".parquet":
df = pd.read_parquet(path)
else:
df = pd.read_csv(path, sep=";")
if not df.empty:
break
if df.empty:
return df
if "type_scrutin" not in df.columns and "election_type" in df.columns:
df["type_scrutin"] = df["election_type"]
if "annee" not in df.columns and "election_year" in df.columns:
df["annee"] = df["election_year"]
if "tour" not in df.columns and "round" in df.columns:
df["tour"] = df["round"]
df["date_scrutin"] = pd.to_datetime(df.get("date_scrutin"), errors="coerce")
for col in ["inscrits", "votants", "blancs", "nuls"]:
if col in df.columns:
df[col] = pd.to_numeric(df[col], errors="coerce")
else:
df[col] = np.nan
if "code_commune" in df.columns:
df["code_commune"] = df["code_commune"].astype(str)
df = df[df["code_commune"] == str(commune_code)]
else:
return pd.DataFrame()
if df.empty:
return df
base_inscrits = df["inscrits"].replace(0, np.nan)
if "turnout_pct" not in df.columns:
df["turnout_pct"] = df["votants"] / base_inscrits
if "blancs_pct" not in df.columns:
df["blancs_pct"] = df["blancs"] / base_inscrits
if "nuls_pct" not in df.columns:
df["nuls_pct"] = df["nuls"] / base_inscrits
df["election_type"] = df["type_scrutin"].astype("string").str.strip().str.lower()
df["election_year"] = pd.to_numeric(df.get("annee"), errors="coerce")
df["round"] = pd.to_numeric(df.get("tour"), errors="coerce").fillna(1).astype(int)
return df[
[
"code_commune",
"election_type",
"election_year",
"round",
"date_scrutin",
"inscrits",
"votants",
"blancs",
"nuls",
"turnout_pct",
"blancs_pct",
"nuls_pct",
]
]
def format_backend_label(backend_kind: str) -> str:
return "PostgreSQL" if backend_kind == "postgres" else "fichiers locaux"
def format_election_type_label(election_type: str) -> str:
label = ELECTION_TYPE_LABELS.get(election_type)
if label:
return label
return str(election_type).replace("_", " ").title()
def format_election_label(
election_type: str,
election_year: int,
round_num: int,
date_scrutin: pd.Timestamp | None = None,
) -> str:
base = f"{format_election_type_label(election_type)} {election_year} - Tour {round_num}"
if date_scrutin is None or pd.isna(date_scrutin):
return base
date_value = pd.to_datetime(date_scrutin).date().isoformat()
return f"{base} ({date_value})"
def format_election_key(election_type: str, election_year: int, round_num: int) -> str:
return f"{election_type}{ELECTION_KEY_SEP}{election_year}{ELECTION_KEY_SEP}{round_num}"
def parse_election_key(key: str) -> Tuple[str, int, int]:
parts = key.split(ELECTION_KEY_SEP)
if len(parts) != 3:
raise ValueError(f"Clé d'élection invalide: {key!r}")
return parts[0], int(parts[1]), int(parts[2])
def format_bureau_label(code_bv: str, bureau_label: str | None) -> str:
code = str(code_bv)
suffix = code.split("-")[-1] if "-" in code else code
if bureau_label is not None and not pd.isna(bureau_label):
label = str(bureau_label).strip()
if label and label != code:
return f"{label} ({code})"
return f"Bureau {suffix} ({code})"
def build_bureau_choices(history: pd.DataFrame) -> list[tuple[str, str]]:
if history.empty:
return []
if "bureau_label" in history.columns:
label_map = (
history[["code_bv", "bureau_label"]]
.dropna(subset=["code_bv"])
.drop_duplicates()
.sort_values("code_bv")
.groupby("code_bv", as_index=False)["bureau_label"]
.first()
)
return [
(format_bureau_label(row.code_bv, row.bureau_label), row.code_bv)
for row in label_map.itertuples(index=False)
]
codes = sorted(history["code_bv"].dropna().unique().tolist())
return [(format_bureau_label(code, None), code) for code in codes]
def build_history_choices(history: pd.DataFrame) -> list[tuple[str, str]]:
if history.empty:
return []
events = (
history[["election_type", "election_year", "round", "date_scrutin"]]
.dropna(subset=["election_type", "election_year", "round"])
.drop_duplicates()
.groupby(["election_type", "election_year", "round"], as_index=False)
.agg(date_scrutin=("date_scrutin", "min"))
.sort_values(["election_year", "election_type", "round"])
)
return [
(
format_election_label(
row.election_type,
int(row.election_year),
int(row.round),
row.date_scrutin,
),
format_election_key(row.election_type, int(row.election_year), int(row.round)),
)
for row in events.itertuples(index=False)
]
def clean_history_frame(history: pd.DataFrame) -> pd.DataFrame:
if history.empty:
return history
clean = history.copy()
clean["code_bv"] = clean["code_bv"].astype("string").str.strip()
clean["election_type"] = clean["election_type"].astype("string").str.strip().str.lower()
clean["category"] = clean["category"].astype("string").str.strip().str.lower()
if "bureau_label" in clean.columns:
clean["bureau_label"] = clean["bureau_label"].astype("string").str.strip()
clean["election_year"] = pd.to_numeric(clean["election_year"], errors="coerce")
clean["round"] = pd.to_numeric(clean["round"], errors="coerce").fillna(1)
clean["date_scrutin"] = pd.to_datetime(clean["date_scrutin"], errors="coerce")
for col in ["share", "share_nat", "turnout_pct"]:
if col in clean.columns:
clean[col] = pd.to_numeric(clean[col], errors="coerce").clip(lower=0, upper=1)
clean = clean.dropna(subset=["code_bv", "election_type", "election_year", "round", "category"])
clean["election_year"] = clean["election_year"].astype(int)
clean["round"] = clean["round"].astype(int)
clean = clean[clean["category"].isin(CANDIDATE_CATEGORIES)]
return clean
def prepare_history_table(history_slice: pd.DataFrame) -> pd.DataFrame:
if history_slice.empty:
return pd.DataFrame(columns=HISTORY_OUTPUT_COLUMNS)
grouped = history_slice.groupby("category", as_index=False).agg(share=("share", "sum"))
clean = pd.DataFrame({"category": ordered_categories()}).merge(grouped, on="category", how="left")
clean["share"] = pd.to_numeric(clean["share"], errors="coerce").fillna(0).clip(lower=0, upper=1)
clean["score_%"] = (clean["share"] * 100).round(1)
clean["categorie"] = clean["category"].map(DISPLAY_CATEGORY_LABELS).fillna(clean["category"])
return clean[HISTORY_OUTPUT_COLUMNS]
def format_history_meta(history_slice: pd.DataFrame) -> str:
if history_slice.empty:
return ""
parts = []
dates = history_slice["date_scrutin"].dropna()
if not dates.empty:
date_value = pd.to_datetime(dates.iloc[0]).date().isoformat()
parts.append(f"Date du scrutin : {date_value}")
turnout_vals = pd.to_numeric(history_slice["turnout_pct"], errors="coerce").dropna()
if not turnout_vals.empty:
parts.append(f"Participation : {turnout_vals.iloc[0] * 100:.1f}%")
return " | ".join(parts)
def _code_bv_full(commune_code: str, bureau_code: str) -> str:
bureau_code = str(bureau_code).zfill(4)
return f"{commune_code}-{bureau_code}"
def load_history_from_db(commune_code: str) -> pd.DataFrame:
engine = get_engine()
query = sa.text(
"""
select cm.insee_code as commune_code,
b.bureau_code,
b.bureau_label,
e.election_type,
e.election_year,
coalesce(e.round, 1) as round,
e.date as date_scrutin,
c.name as category,
rl.share_pct,
rl.turnout_pct,
rn.share_pct as share_nat
from results_local rl
join bureaux b on rl.bureau_id = b.id
join communes cm on b.commune_id = cm.id
join elections e on rl.election_id = e.id
join categories c on rl.category_id = c.id
left join results_national rn on rn.election_id = e.id and rn.category_id = rl.category_id
where cm.insee_code = :commune
"""
)
df = pd.read_sql(query, engine, params={"commune": commune_code})
if df.empty:
raise RuntimeError("Aucune donnée dans la base pour la commune demandée.")
df["code_bv"] = df.apply(lambda r: _code_bv_full(r["commune_code"], r["bureau_code"]), axis=1)
df["date_scrutin"] = pd.to_datetime(df["date_scrutin"])
df["share"] = pd.to_numeric(df["share_pct"], errors="coerce") / 100
df["share_nat"] = pd.to_numeric(df["share_nat"], errors="coerce") / 100
df["turnout_pct"] = pd.to_numeric(df["turnout_pct"], errors="coerce") / 100
df["election_year"] = pd.to_numeric(df["election_year"], errors="coerce")
df["round"] = pd.to_numeric(df["round"], errors="coerce").fillna(1).astype(int)
return df[
[
"commune_code",
"code_bv",
"bureau_label",
"election_type",
"election_year",
"round",
"date_scrutin",
"category",
"share",
"share_nat",
"turnout_pct",
]
]
def load_history_from_files(commune_code: str) -> pd.DataFrame:
elections_long_all = load_elections_long(
Path("data/interim/elections_long.parquet"),
commune_code=commune_code,
)
mapping = load_mapping(Path("data/mapping_candidats_blocs.csv"))
expanded_all = expand_by_category(elections_long_all, mapping)
local_all = aggregate_by_event(expanded_all)
nat = compute_national_reference(local_all)
local = local_all[local_all["commune_code"] == commune_code].copy()
local = local.merge(nat, on=["election_type", "election_year", "round", "category"], how="left")
# Columns already in aggregate_by_event/compute_national_reference
if "share" not in local.columns:
raise RuntimeError("Colonne share absente du dataset local (fallback fichiers).")
local["bureau_label"] = None
local["share_nat"] = local.get("share_nat")
local["turnout_pct"] = local.get("turnout_pct")
return local.rename(
columns={
"annee": "election_year",
"tour": "round",
}
)[
[
"commune_code",
"code_bv",
"bureau_label",
"election_type",
"election_year",
"round",
"date_scrutin",
"category",
"share",
"share_nat",
"turnout_pct",
]
]
def references_from_history(history: pd.DataFrame, target_year: int) -> Dict[str, Dict[Tuple[str, str], float]]:
hist = history[history["election_year"] < target_year].copy()
leg = (
hist[hist["election_type"] == "legislatives"]
.sort_values("date_scrutin")
.groupby(["code_bv", "category"])["share"]
.last()
)
mun2020 = (
hist[(hist["election_type"] == "municipales") & (hist["election_year"] == 2020)]
.sort_values("date_scrutin")
.groupby(["code_bv", "category"])["share"]
.last()
)
return {"leg": leg.to_dict(), "mun2020": mun2020.to_dict()}
def build_features_from_history(history: pd.DataFrame, target_type: str, target_year: int) -> pd.DataFrame:
hist = history[history["election_year"] < target_year].copy()
if hist.empty:
return pd.DataFrame()
hist = hist.sort_values("date_scrutin")
hist["dev_to_nat"] = hist["share"] - hist["share_nat"]
last_any_share = hist.groupby(["code_bv", "category"])["share"].last()
last_any_dev = hist.groupby(["code_bv", "category"])["dev_to_nat"].last()
last_type_share = (
hist[hist["election_type"] == target_type]
.groupby(["code_bv", "category"])["share"]
.last()
)
last_type_dev = (
hist[hist["election_type"] == target_type]
.groupby(["code_bv", "category"])["dev_to_nat"]
.last()
)
swing_any = (
hist.groupby(["code_bv", "category"])["share"]
.apply(lambda s: s.iloc[-1] - s.iloc[-2] if len(s) >= 2 else np.nan)
.rename("swing_any")
)
turnout_any = hist.groupby("code_bv")["turnout_pct"].last()
turnout_type = (
hist[hist["election_type"] == target_type]
.groupby("code_bv")["turnout_pct"]
.last()
)
bureaux = sorted(hist["code_bv"].dropna().unique())
records = []
for code_bv in bureaux:
record = {
"commune_code": str(code_bv).split("-")[0],
"code_bv": code_bv,
"election_type": target_type,
"election_year": target_year,
"round": 1,
"date_scrutin": f"{target_year}-01-01",
"prev_turnout_any_lag1": turnout_any.get(code_bv, np.nan),
"prev_turnout_same_type_lag1": turnout_type.get(code_bv, np.nan),
}
for cat in CANDIDATE_CATEGORIES:
record[f"prev_share_any_lag1_{cat}"] = last_any_share.get((code_bv, cat), np.nan)
record[f"prev_share_type_lag1_{cat}"] = last_type_share.get((code_bv, cat), np.nan)
record[f"prev_dev_to_national_any_lag1_{cat}"] = last_any_dev.get((code_bv, cat), np.nan)
record[f"prev_dev_to_national_type_lag1_{cat}"] = last_type_dev.get((code_bv, cat), np.nan)
record[f"swing_any_{cat}"] = swing_any.get((code_bv, cat), np.nan)
records.append(record)
return pd.DataFrame.from_records(records)
def load_model() -> Path:
best_path = MODEL_DIR / "best_model.json"
if best_path.exists():
try:
payload = json.loads(best_path.read_text())
name = payload.get("name")
if name:
candidate = MODEL_DIR / f"{name}.joblib"
if candidate.exists():
return candidate
except Exception:
pass
if (MODEL_DIR / "hist_gradient_boosting.joblib").exists():
return MODEL_DIR / "hist_gradient_boosting.joblib"
joblibs = sorted(MODEL_DIR.glob("*.joblib"))
if not joblibs:
raise FileNotFoundError("Aucun modèle trouvé dans models/. Lancez src/model/train.py.")
return joblibs[0]
def load_feature_columns(path: Path, df: pd.DataFrame) -> list[str]:
if path.exists():
return json.loads(path.read_text())
exclude = {"commune_code", "code_bv", "election_type", "election_year", "round", "date_scrutin"}
return [c for c in df.columns if c not in exclude]
def format_delta(value) -> str:
if value is None or (isinstance(value, float) and np.isnan(value)):
return "N/A"
sign = "+" if value >= 0 else ""
return f"{sign}{round(value, 1)}"
class PredictorBackend:
def __init__(self, commune_code: str = COMMUNE_CODE_SETE):
self.commune_code = commune_code
self.backend = "local"
try:
self.history = load_history_from_db(commune_code)
self.backend = "postgres"
LOGGER.info("Backend PostgreSQL chargé (%s lignes)", len(self.history))
except Exception as exc:
LOGGER.warning("PostgreSQL indisponible (%s) -> fallback fichiers.", exc)
self.history = load_history_from_files(commune_code)
self.backend = "files"
LOGGER.info("Backend fichiers chargé (%s lignes)", len(self.history))
self.history = clean_history_frame(self.history)
self.event_stats = load_bureau_event_stats(commune_code)
self.commune_stats = load_commune_event_stats(commune_code)
self.default_rates = {}
self.default_rates_by_type: dict[str, dict[str, float]] = {}
stats = self.commune_stats if not self.commune_stats.empty else self.event_stats
if not stats.empty:
if "round" in stats.columns:
round1 = stats[stats["round"] == 1]
if not round1.empty:
stats = round1
self.default_rates = {
"turnout_pct": float(stats["turnout_pct"].median(skipna=True)),
"blancs_pct": float(stats["blancs_pct"].median(skipna=True)),
"nuls_pct": float(stats["nuls_pct"].median(skipna=True)),
}
if "election_type" in stats.columns:
for etype, group in stats.groupby("election_type"):
self.default_rates_by_type[str(etype)] = {
"turnout_pct": float(group["turnout_pct"].median(skipna=True)),
"blancs_pct": float(group["blancs_pct"].median(skipna=True)),
"nuls_pct": float(group["nuls_pct"].median(skipna=True)),
}
self.model_path = load_model()
self.model = joblib.load(self.model_path)
# feature cache per target
self.refs_cache: Dict[Tuple[str, int], Dict[str, Dict[Tuple[str, str], float]]] = {}
def available_bureaux(self) -> list[str]:
return sorted(self.history["code_bv"].dropna().unique().tolist())
def available_targets(self) -> list[Tuple[str, int]]:
existing = set()
for row in self.history.itertuples(index=False):
try:
year = int(row.election_year) # type: ignore
except Exception:
continue
existing.add((row.election_type, year))
for t in DEFAULT_TARGETS:
existing.add(t)
return sorted(existing, key=lambda x: (x[1], x[0]))
def _get_features_and_refs(self, target_type: str, target_year: int) -> Tuple[pd.DataFrame, Dict[str, Dict[Tuple[str, str], float]]]:
key = (target_type, target_year)
if key not in FEATURE_CACHE:
feature_df = build_features_from_history(self.history, target_type, target_year)
refs = references_from_history(self.history, target_year)
FEATURE_CACHE[key] = (feature_df, refs)
return FEATURE_CACHE[key]
def predict_bureau_details(
self,
code_bv: str,
target_type: str,
target_year: int,
inscrits_override: float | None = None,
share_overrides: Dict[str, float] | None = None,
abstention_override_pct: float | None = None,
blancs_override_pct: float | None = None,
nuls_override_pct: float | None = None,
) -> Tuple[Dict[str, object] | None, str, str]:
feature_df, _ = self._get_features_and_refs(target_type, target_year)
if feature_df.empty:
return None, "Données insuffisantes", ""
row = feature_df[feature_df["code_bv"] == code_bv].copy()
if row.empty:
return None, "Bureau non trouvé dans l'historique.", ""
feature_cols = load_feature_columns(FEATURE_COLS_PATH, feature_df)
missing = [c for c in feature_cols if c not in row.columns]
for col in missing:
row[col] = np.nan
preds = self.model.predict(row[feature_cols])
preds = np.clip(preds, 0, 1)
sums = preds.sum(axis=1, keepdims=True)
sums[sums == 0] = 1
preds = preds / sums
preds_share = preds.flatten()
preds_by_cat = {cat: float(preds_share[idx]) for idx, cat in enumerate(CANDIDATE_CATEGORIES)}
preds_by_cat = blend_with_type_history(preds_by_cat, row.iloc[0], target_type)
ordered = ordered_categories()
preds_by_cat = apply_share_overrides(preds_by_cat, share_overrides, ordered)
share_vec = np.array([preds_by_cat.get(cat, 0.0) for cat in ordered], dtype=float)
stats = self.event_stats[self.event_stats["code_bv"] == code_bv].sort_values("date_scrutin")
inscrits_used = None
if inscrits_override is not None:
try:
value = float(inscrits_override)
if value > 0:
inscrits_used = value
except (TypeError, ValueError):
inscrits_used = None
if inscrits_used is None and not stats.empty:
serie = pd.to_numeric(stats["inscrits"], errors="coerce").dropna()
if not serie.empty:
inscrits_used = float(serie.iloc[-1])
if inscrits_used is None:
return None, "Inscrits indisponibles pour ce bureau.", ""
def pick_rate(col: str) -> float:
default = self.default_rates.get(col, 0.0)
default = 0.0 if default is None or np.isnan(default) else float(default)
type_default = self.default_rates_by_type.get(target_type, {}).get(col)
if type_default is None or np.isnan(type_default):
type_default = default
bureau_scoped = self.event_stats
if not bureau_scoped.empty and "round" in bureau_scoped.columns:
round1 = bureau_scoped[bureau_scoped["round"] == 1]
if not round1.empty:
bureau_scoped = round1
series = None
years = None
if (
not bureau_scoped.empty
and col in bureau_scoped.columns
and "election_type" in bureau_scoped.columns
):
if target_type in bureau_scoped["election_type"].values:
mask = bureau_scoped["election_type"] == target_type
series = bureau_scoped.loc[mask, col]
years = bureau_scoped.loc[mask, "election_year"]
if series is None and not self.commune_stats.empty and col in self.commune_stats.columns:
commune_scoped = self.commune_stats
if "round" in commune_scoped.columns:
round1 = commune_scoped[commune_scoped["round"] == 1]
if not round1.empty:
commune_scoped = round1
if target_type in commune_scoped["election_type"].values:
mask = commune_scoped["election_type"] == target_type
series = commune_scoped.loc[mask, col]
years = commune_scoped.loc[mask, "election_year"]
else:
series = commune_scoped[col]
years = commune_scoped["election_year"]
if series is None:
if bureau_scoped.empty or col not in bureau_scoped.columns:
return type_default
series = bureau_scoped[col]
years = bureau_scoped["election_year"]
rate = _project_rate(series, years, target_year)
if rate is None or np.isnan(rate):
return type_default
return float(rate)
turnout_rate = pick_rate("turnout_pct")
blancs_rate = pick_rate("blancs_pct")
nuls_rate = pick_rate("nuls_pct")
abstention_override = _normalize_override_pct(abstention_override_pct)
if abstention_override is not None:
turnout_rate = float(np.clip(1.0 - (abstention_override / 100.0), 0.0, 1.0))
blancs_override = _normalize_override_pct(blancs_override_pct)
if blancs_override is not None:
blancs_rate = float(blancs_override / 100.0)
nuls_override = _normalize_override_pct(nuls_override_pct)
if nuls_override is not None:
nuls_rate = float(nuls_override / 100.0)
if blancs_rate + nuls_rate > turnout_rate and (blancs_rate + nuls_rate) > 0:
scale = turnout_rate / (blancs_rate + nuls_rate)
blancs_rate *= scale
nuls_rate *= scale
inscrits_total = int(round(inscrits_used))
votants_total = int(round(inscrits_total * turnout_rate))
blancs_total = int(round(inscrits_total * blancs_rate))
nuls_total = int(round(inscrits_total * nuls_rate))
if blancs_total + nuls_total > votants_total and (blancs_total + nuls_total) > 0:
scale = votants_total / (blancs_total + nuls_total)
blancs_total = int(round(blancs_total * scale))
nuls_total = int(round(nuls_total * scale))
exprimes_total = max(0, votants_total - blancs_total - nuls_total)
abstention_total = max(0, inscrits_total - votants_total)
bloc_counts = _allocate_counts(share_vec, exprimes_total)
counts_by_cat = {cat: int(count) for cat, count in zip(ordered, bloc_counts)}
counts_by_cat.update(
{
"blancs": int(blancs_total),
"nuls": int(nuls_total),
"abstention": int(abstention_total),
}
)
backend_label = format_backend_label(self.backend)
meta = (
f"Inscrits utilisés : {inscrits_total} | Votants : {votants_total} | "
f"Blancs : {blancs_total} | Nuls : {nuls_total} | Abstentions : {abstention_total}"
)
details = {
"shares_by_cat": preds_by_cat,
"share_vec": share_vec,
"ordered": ordered,
"counts": counts_by_cat,
"totals": {
"inscrits": inscrits_total,
"votants": votants_total,
"blancs": blancs_total,
"nuls": nuls_total,
"abstention": abstention_total,
"exprimes": exprimes_total,
},
}
return details, backend_label, meta
def predict_bureau(
self,
code_bv: str,
target_type: str,
target_year: int,
inscrits_override: float | None = None,
share_overrides: Dict[str, float] | None = None,
abstention_override_pct: float | None = None,
blancs_override_pct: float | None = None,
nuls_override_pct: float | None = None,
) -> Tuple[pd.DataFrame, str, str]:
details, backend_label, meta = self.predict_bureau_details(
code_bv,
target_type,
target_year,
inscrits_override,
share_overrides,
abstention_override_pct,
blancs_override_pct,
nuls_override_pct,
)
if details is None:
return pd.DataFrame(), backend_label, ""
counts_by_cat = details["counts"]
ordered = details["ordered"]
rows = []
for cat in ordered:
rows.append(
{
"categorie": DISPLAY_CATEGORY_LABELS.get(cat, cat),
"nombre": int(counts_by_cat.get(cat, 0)),
}
)
for extra in ["blancs", "nuls", "abstention"]:
rows.append(
{
"categorie": DISPLAY_CATEGORY_LABELS[extra],
"nombre": int(counts_by_cat.get(extra, 0)),
}
)
return pd.DataFrame(rows), backend_label, meta
def build_bar_chart(
df: pd.DataFrame,
value_col: str,
*,
color: str = "#3b82f6",
color_map: Dict[str, str] | None = None,
category_col: str = "categorie",
ylabel: str = "Score (%)",
):
try:
import matplotlib.pyplot as plt
except Exception:
return None
if df.empty or value_col not in df.columns:
return None
plt.figure(figsize=(6, 3))
labels = df[category_col].astype(str).tolist() if category_col in df.columns else []
if color_map:
colors = [color_map.get(label, color) for label in labels]
else:
colors = color
plt.bar(labels, df[value_col], color=colors)
plt.xticks(rotation=30, ha="right")
plt.ylabel(ylabel)
plt.tight_layout()
return plt
def create_interface() -> gr.Blocks:
backend = PredictorBackend()
bureau_choices = build_bureau_choices(backend.history)
bureau_labels = [label for label, _ in bureau_choices]
bureau_map = {label: value for label, value in bureau_choices}
bureau_label_by_code = {value: label for label, value in bureau_choices}
targets = backend.available_targets()
target_labels = [f"{t} {y}" for t, y in targets]
history_choices = build_history_choices(backend.history)
history_labels = [label for label, _ in history_choices]
history_map = {label: value for label, value in history_choices}
if ("municipales", 2026) in targets:
default_target = "municipales 2026"
elif targets:
default_target = f"{targets[-1][0]} {targets[-1][1]}"
else:
default_target = "municipales 2026"
default_bv = bureau_labels[0] if bureau_labels else None
default_history = history_labels[-1] if history_labels else None
backend_label = format_backend_label(backend.backend)
residual_payload = load_residual_intervals()
residuals = residual_payload.get("residuals", {}) if isinstance(residual_payload, dict) else {}
residual_model = residual_payload.get("model", "inconnu") if isinstance(residual_payload, dict) else "inconnu"
interval_choices = list(INTERVAL_BANDS.keys()) or ["80% (p10-p90)"]
interval_default = interval_choices[0]
bloc_labels = [DISPLAY_CATEGORY_LABELS.get(cat, cat) for cat in ordered_categories()]
with gr.Blocks(title="Prévision Municipales — Ville de Sète") as demo:
gr.Markdown(
"""
# Prévision Municipales — Ville de Sète
Choisissez un bureau de vote et une élection cible.
Le modèle estime un volume par catégorie politique, ainsi que les abstentions, blancs et nuls.
Auteur : [Stéphane Manet](https://manet-conseil.fr) - [Linkedin](https://www.linkedin.com/in/stephanemanet) - [GitHub](https://github.com/stephmnt)
"""
)
with gr.Tabs():
with gr.Tab("Prévisions"):
with gr.Row():
bureau_dd = gr.Dropdown(choices=bureau_labels, value=default_bv, label="Bureau de vote")
target_dd = gr.Dropdown(choices=target_labels, value=default_target, label="Élection cible (type année)")
inscrits_in = gr.Number(value=None, label="Inscrits (optionnel)", precision=0)
override_inputs: Dict[str, gr.Number] = {}
with gr.Accordion("Imputation manuelle (optionnel)", open=False):
gr.Markdown("Abstention / blancs / nuls en % des inscrits.")
with gr.Row():
abstention_in = gr.Number(value=40, label="Abstention (% inscrits)", precision=1)
blancs_in = gr.Number(value=None, label="Blancs (% inscrits)", precision=1)
nuls_in = gr.Number(value=None, label="Nuls (% inscrits)", precision=1)
gr.Markdown("Nuances politiques en % des exprimés (laisser vide pour garder le modèle).")
cats = ordered_categories()
with gr.Row():
for cat in cats[:4]:
override_inputs[cat] = gr.Number(
value=None,
label=DISPLAY_CATEGORY_LABELS.get(cat, cat),
precision=1,
)
with gr.Row():
for cat in cats[4:]:
override_inputs[cat] = gr.Number(
value=None,
label=DISPLAY_CATEGORY_LABELS.get(cat, cat),
precision=1,
)
predict_btn = gr.Button("Prédire")
source_box = gr.Markdown(value=f"Source des données : {backend_label}")
output_df = gr.Dataframe(
headers=PREDICTION_OUTPUT_COLUMNS,
label="Prédictions (nombres)",
)
chart = gr.Plot()
with gr.Tab("Historique"):
gr.Markdown(
"""
Consultation des résultats passés (sans machine learning).
Sélectionnez un bureau et une élection pour afficher l'histogramme des parts par tendance politique.
"""
)
with gr.Row():
history_bureau_dd = gr.Dropdown(choices=bureau_labels, value=default_bv, label="Bureau de vote")
history_election_dd = gr.Dropdown(
choices=history_labels,
value=default_history,
label="Élection (type année tour)",
)
history_btn = gr.Button("Afficher l'historique")
history_source = gr.Markdown(value=f"Source des données : {backend_label}")
history_df = gr.Dataframe(headers=HISTORY_OUTPUT_COLUMNS, label="Résultats historiques")
history_chart = gr.Plot()
history_meta = gr.Markdown()
with gr.Tab("Carte"):
gr.Markdown(
"""
Carte des bureaux de vote de Sète.
Cliquez sur un polygone pour afficher la prédiction (table + graphique).
"""
)
map_legend = gr.HTML(value=build_map_legend_html())
with gr.Row():
map_target_dd = gr.Dropdown(
choices=target_labels,
value=default_target,
label="Élection cible (type année)",
)
map_btn = gr.Button("Afficher la carte")
map_html = gr.HTML(value="<p>Cliquez sur 'Afficher la carte' pour charger la carte.</p>")
with gr.Tab("Stratégie"):
gr.Markdown(
"""
Analyse stratégique par bureau : intervalles d'incertitude issus des résidus CV,
puis simulateur de transferts pour estimer des bascules potentielles.
"""
)
with gr.Row():
strategy_bureau_dd = gr.Dropdown(choices=bureau_labels, value=default_bv, label="Bureau de vote")
strategy_target_dd = gr.Dropdown(
choices=target_labels,
value=default_target,
label="Élection cible (type année)",
)
strategy_inscrits_in = gr.Number(value=None, label="Inscrits (optionnel)", precision=0)
interval_dd = gr.Dropdown(
choices=interval_choices,
value=interval_default,
label="Intervalle CV",
)
strategy_btn = gr.Button("Analyser l'incertitude")
interval_source = gr.Markdown(
value=(
f"Intervalle CV basé sur le modèle : {residual_model}"
if residuals
else "Intervalle CV indisponible (fallback ±3%)."
)
)
interval_df = gr.Dataframe(
headers=INTERVAL_OUTPUT_COLUMNS,
label="Plage empirique par bloc",
)
interval_chart = gr.Plot()
gr.Markdown("### Simulateur de transferts (points d'inscrits)")
with gr.Row():
target_bloc_dd = gr.Dropdown(choices=bloc_labels, value=bloc_labels[0] if bloc_labels else None, label="Bloc cible")
with gr.Row():
source_1_dd = gr.Dropdown(choices=TRANSFER_CATEGORY_LABELS, value=DISPLAY_CATEGORY_LABELS["abstention"], label="Source 1")
target_1_dd = gr.Dropdown(choices=TRANSFER_CATEGORY_LABELS, value=DISPLAY_CATEGORY_LABELS["droite_dure"], label="Cible 1")
delta_1 = gr.Slider(minimum=0, maximum=10, value=3, step=0.1, label="Delta 1 (points %)")
with gr.Row():
source_2_dd = gr.Dropdown(choices=TRANSFER_CATEGORY_LABELS, value=DISPLAY_CATEGORY_LABELS["droite_modere"], label="Source 2")
target_2_dd = gr.Dropdown(choices=TRANSFER_CATEGORY_LABELS, value=DISPLAY_CATEGORY_LABELS["gauche_modere"], label="Cible 2")
delta_2 = gr.Slider(minimum=0, maximum=10, value=3, step=0.1, label="Delta 2 (points %)")
simulate_btn = gr.Button("Simuler les transferts")
sim_df = gr.Dataframe(headers=SIM_OUTPUT_COLUMNS, label="Simulation par catégorie")
sim_chart = gr.Plot()
opportunity_df = gr.Dataframe(headers=OPPORTUNITY_OUTPUT_COLUMNS, label="Bureaux à potentiel (trié)")
def _predict(
bv_label: str,
target_label: str,
inscrits_override: float | None,
abstention_override: float | None,
blancs_override: float | None,
nuls_override: float | None,
*cat_overrides: float,
):
if not bv_label or not target_label:
return pd.DataFrame(), "Entrée invalide", None
code_bv = bureau_map.get(bv_label)
if not code_bv:
return pd.DataFrame(), "Bureau invalide", None
try:
parts = target_label.split()
target_type, target_year = parts[0].lower(), int(parts[1])
except Exception:
target_type, target_year = "municipales", 2026
share_overrides: Dict[str, float] = {}
for cat, value in zip(ordered_categories(), cat_overrides):
norm = _normalize_override_pct(value)
if norm is None:
continue
share_overrides[cat] = norm
df, backend_label, meta = backend.predict_bureau(
code_bv,
target_type,
target_year,
inscrits_override,
share_overrides=share_overrides if share_overrides else None,
abstention_override_pct=abstention_override,
blancs_override_pct=blancs_override,
nuls_override_pct=nuls_override,
)
plot = build_bar_chart(
df,
value_col="nombre",
ylabel="Nombre d'électeurs",
color_map=DISPLAY_LABEL_COLORS,
)
meta_label = f" | {meta}" if meta else ""
return df, f"Source des données : {backend_label}{meta_label}", plot
def _parse_target_label(target_label: str) -> Tuple[str, int]:
try:
parts = target_label.split()
return parts[0].lower(), int(parts[1])
except Exception:
return "municipales", 2026
def _map(target_label: str):
if not target_label:
return "<p>Élection invalide.</p>"
target_type, target_year = _parse_target_label(target_label)
return build_bureau_map_html(backend, target_type, target_year)
def _history(bv_label: str, election_label: str):
if not bv_label or not election_label:
empty = pd.DataFrame(columns=HISTORY_OUTPUT_COLUMNS)
return empty, "Entrée invalide", None, ""
code_bv = bureau_map.get(bv_label)
if not code_bv:
empty = pd.DataFrame(columns=HISTORY_OUTPUT_COLUMNS)
return empty, "Bureau invalide", None, ""
election_key = history_map.get(election_label)
if not election_key:
empty = pd.DataFrame(columns=HISTORY_OUTPUT_COLUMNS)
return empty, "Élection invalide", None, ""
try:
election_type, election_year, round_num = parse_election_key(election_key)
except Exception:
empty = pd.DataFrame(columns=HISTORY_OUTPUT_COLUMNS)
return empty, "Élection invalide", None, ""
history_slice = backend.history[
(backend.history["code_bv"] == code_bv)
& (backend.history["election_type"] == election_type)
& (backend.history["election_year"] == election_year)
& (backend.history["round"] == round_num)
].copy()
if history_slice.empty:
empty = pd.DataFrame(columns=HISTORY_OUTPUT_COLUMNS)
return empty, f"Source des données : {backend_label}", None, "Aucun résultat pour ce bureau."
table = prepare_history_table(history_slice)
plot = build_bar_chart(
table,
value_col="score_%",
ylabel="Score (%)",
color_map=DISPLAY_LABEL_COLORS,
)
meta = format_history_meta(history_slice)
return table, f"Source des données : {backend_label}", plot, meta
def _strategy_interval(
bv_label: str,
target_label: str,
inscrits_override: float | None,
band_label: str,
):
empty = pd.DataFrame(columns=INTERVAL_OUTPUT_COLUMNS)
if not bv_label or not target_label:
return empty, "Entrée invalide", None
code_bv = bureau_map.get(bv_label)
if not code_bv:
return empty, "Bureau invalide", None
target_type, target_year = _parse_target_label(target_label)
details, backend_label_local, _ = backend.predict_bureau_details(
code_bv,
target_type,
target_year,
inscrits_override,
)
if details is None:
return empty, backend_label_local, None
totals = details["totals"]
exprimes_total = int(totals.get("exprimes", 0))
table = build_interval_table(
details["shares_by_cat"],
exprimes_total,
residuals,
band_label,
)
plot = build_interval_chart(table, color_map=DISPLAY_LABEL_COLORS)
source = (
f"Intervalle CV ({band_label}) basé sur le modèle : {residual_model}"
if residuals
else "Intervalle CV indisponible (fallback ±3%)."
)
return table, source, plot
def _strategy_simulate(
bv_label: str,
target_label: str,
inscrits_override: float | None,
bloc_cible_label: str,
source_1: str,
target_1: str,
delta_1_val: float,
source_2: str,
target_2: str,
delta_2_val: float,
):
empty_sim = pd.DataFrame(columns=SIM_OUTPUT_COLUMNS)
empty_oppo = pd.DataFrame(columns=OPPORTUNITY_OUTPUT_COLUMNS)
if not bv_label or not target_label:
return empty_sim, None, empty_oppo
code_bv = bureau_map.get(bv_label)
if not code_bv:
return empty_sim, None, empty_oppo
target_type, target_year = _parse_target_label(target_label)
details, _, _ = backend.predict_bureau_details(
code_bv,
target_type,
target_year,
inscrits_override,
)
if details is None:
return empty_sim, None, empty_oppo
transfers = []
for src_label, dst_label, delta in [
(source_1, target_1, delta_1_val),
(source_2, target_2, delta_2_val),
]:
src_key = CATEGORY_LABEL_TO_KEY.get(src_label)
dst_key = CATEGORY_LABEL_TO_KEY.get(dst_label)
if src_key and dst_key and delta and delta > 0:
transfers.append((src_key, dst_key, float(delta)))
counts = details["counts"]
totals = details["totals"]
inscrits_total = int(totals.get("inscrits", 0))
updated = apply_transfers(counts, inscrits_total, transfers)
sim_table = build_simulation_table(counts, updated)
sim_plot = build_bar_chart(
sim_table,
value_col="apres_transfert",
ylabel="Nombre d'électeurs",
color_map=DISPLAY_LABEL_COLORS,
)
target_bloc = CATEGORY_LABEL_TO_KEY.get(bloc_cible_label, bloc_cible_label)
opp_rows = []
if target_bloc in ordered_categories():
for bv_code in backend.available_bureaux():
override = inscrits_override if bv_code == code_bv else None
bv_details, _, _ = backend.predict_bureau_details(
bv_code,
target_type,
target_year,
override,
)
if bv_details is None:
continue
base_counts = bv_details["counts"]
bv_totals = bv_details["totals"]
bv_inscrits = int(bv_totals.get("inscrits", 0))
updated_counts = apply_transfers(base_counts, bv_inscrits, transfers)
bloc_counts = {cat: int(base_counts.get(cat, 0)) for cat in ordered_categories()}
updated_blocs = {cat: int(updated_counts.get(cat, 0)) for cat in ordered_categories()}
top_base = max(bloc_counts, key=bloc_counts.get) if bloc_counts else None
top_after = max(updated_blocs, key=updated_blocs.get) if updated_blocs else None
gain = int(updated_counts.get(target_bloc, 0) - base_counts.get(target_bloc, 0))
opp_rows.append(
{
"bureau": bureau_label_by_code.get(bv_code, bv_code),
"gain_cible": gain,
"score_base": int(base_counts.get(target_bloc, 0)),
"score_apres": int(updated_counts.get(target_bloc, 0)),
"top_base": DISPLAY_CATEGORY_LABELS.get(top_base, top_base),
"top_apres": DISPLAY_CATEGORY_LABELS.get(top_after, top_after),
"bascule": "oui" if top_base != target_bloc and top_after == target_bloc else "non",
}
)
opp_df = pd.DataFrame(opp_rows, columns=OPPORTUNITY_OUTPUT_COLUMNS)
if not opp_df.empty:
opp_df = opp_df.sort_values(["bascule", "gain_cible"], ascending=[False, False])
return sim_table, sim_plot, opp_df
predict_inputs = [bureau_dd, target_dd, inscrits_in, abstention_in, blancs_in, nuls_in]
predict_inputs += [override_inputs[cat] for cat in ordered_categories() if cat in override_inputs]
predict_btn.click(_predict, inputs=predict_inputs, outputs=[output_df, source_box, chart])
history_btn.click(
_history,
inputs=[history_bureau_dd, history_election_dd],
outputs=[history_df, history_source, history_chart, history_meta],
)
map_btn.click(
_map,
inputs=[map_target_dd],
outputs=[map_html],
)
strategy_btn.click(
_strategy_interval,
inputs=[strategy_bureau_dd, strategy_target_dd, strategy_inscrits_in, interval_dd],
outputs=[interval_df, interval_source, interval_chart],
)
simulate_btn.click(
_strategy_simulate,
inputs=[
strategy_bureau_dd,
strategy_target_dd,
strategy_inscrits_in,
target_bloc_dd,
source_1_dd,
target_1_dd,
delta_1,
source_2_dd,
target_2_dd,
delta_2,
],
outputs=[sim_df, sim_chart, opportunity_df],
)
return demo
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s")
demo = create_interface()
demo.launch(server_name="0.0.0.0", server_port=7860)