from __future__ import annotations import base64 import io import json import logging import re import warnings from html import escape from pathlib import Path from typing import Dict, Tuple import gradio as gr import joblib import numpy as np import pandas as pd import sqlalchemy as sa from src.constants import CANDIDATE_CATEGORIES from src.db.schema import get_engine from src.features.build_features import ( aggregate_by_event, compute_national_reference, expand_by_category, load_elections_long, load_mapping, ) LOGGER = logging.getLogger(__name__) COMMUNE_CODE_SETE = "34301" MODEL_DIR = Path("models") FEATURE_COLS_PATH = MODEL_DIR / "feature_columns.json" RESIDUAL_INTERVALS_PATH = Path("reports/residual_intervals.json") GEO_DIR = Path("data/geo") DEFAULT_TARGETS = [ ("municipales", 2026), ("legislatives", 2027), ("presidentielles", 2027), ] FEATURE_CACHE: Dict[Tuple[str, int], Tuple[pd.DataFrame, Dict[str, Dict[Tuple[str, str], float]]]] = {} ELECTION_KEY_SEP = "|" ELECTION_TYPE_LABELS = { "municipales": "Municipales", "legislatives": "Législatives", "presidentielles": "Présidentielles", "europeennes": "Européennes", "regionales": "Régionales", "departementales": "Départementales", } HISTORY_OUTPUT_COLUMNS = ["categorie", "score_%"] PREDICTION_OUTPUT_COLUMNS = ["categorie", "nombre"] INTERVAL_OUTPUT_COLUMNS = ["categorie", "baseline_%", "min_%", "max_%", "baseline", "min", "max"] SIM_OUTPUT_COLUMNS = ["categorie", "baseline", "apres_transfert", "delta"] OPPORTUNITY_OUTPUT_COLUMNS = [ "bureau", "gain_cible", "score_base", "score_apres", "top_base", "top_apres", "bascule", ] DISPLAY_CATEGORY_ORDER = [ "extreme_gauche", "gauche_dure", "gauche_modere", "centre", "droite_modere", "droite_dure", "extreme_droite", ] PREDICTION_CATEGORY_ORDER = DISPLAY_CATEGORY_ORDER + ["blancs", "nuls", "abstention"] DISPLAY_CATEGORY_LABELS = { "extreme_gauche": "extrême-gauche", "gauche_dure": "gauche dure", "gauche_modere": "gauche modérée", "centre": "centre", "droite_modere": "droite modérée", "droite_dure": "droite dure", "extreme_droite": "extrême-droite", "blancs": "blancs", "nuls": "nuls", "abstention": "abstentions", } DISPLAY_CATEGORY_COLORS = { "extreme_gauche": "#7f1d1d", "gauche_dure": "#dc2626", "gauche_modere": "#f472b6", "centre": "#facc15", "droite_modere": "#60a5fa", "droite_dure": "#1e3a8a", "extreme_droite": "#111827", } EXTRA_CATEGORY_COLORS = { "blancs": "#e5e7eb", "nuls": "#9ca3af", "abstention": "#6b7280", } DISPLAY_LABEL_COLORS = { DISPLAY_CATEGORY_LABELS[key]: color for key, color in DISPLAY_CATEGORY_COLORS.items() } DISPLAY_LABEL_COLORS.update( {DISPLAY_CATEGORY_LABELS[key]: color for key, color in EXTRA_CATEGORY_COLORS.items()} ) CATEGORY_LABEL_TO_KEY = {label: key for key, label in DISPLAY_CATEGORY_LABELS.items()} TRANSFER_CATEGORY_LABELS = [DISPLAY_CATEGORY_LABELS[key] for key in PREDICTION_CATEGORY_ORDER] DEFAULT_RESIDUAL_SPREAD = 0.03 INTERVAL_BANDS = { "80% (p10-p90)": ("q10", "q90"), "90% (p05-p95)": ("q05", "q95"), } NEUTRAL_MARGIN_SHARE = 0.10 TYPE_HISTORY_BLEND = { "presidentielles": 0.4, "legislatives": 0.35, "europeennes": 0.3, "regionales": 0.3, "departementales": 0.3, "municipales": 0.2, } try: from numpy import RankWarning as NP_RANK_WARNING # type: ignore[attr-defined] except Exception: class NP_RANK_WARNING(UserWarning): pass def ordered_categories() -> list[str]: return [cat for cat in DISPLAY_CATEGORY_ORDER if cat in CANDIDATE_CATEGORIES] def load_residual_intervals(path: Path = RESIDUAL_INTERVALS_PATH) -> Dict[str, object]: if not path.exists(): return {} try: payload = json.loads(path.read_text()) except Exception: return {} if isinstance(payload, dict): return payload return {} def get_interval_bounds( residuals: Dict[str, Dict[str, float]], category: str, band_label: str, ) -> Tuple[float, float]: keys = INTERVAL_BANDS.get(band_label, ("q10", "q90")) cat_resid = residuals.get(category, {}) low = cat_resid.get(keys[0]) high = cat_resid.get(keys[1]) if low is None or high is None: return -DEFAULT_RESIDUAL_SPREAD, DEFAULT_RESIDUAL_SPREAD return float(low), float(high) def build_interval_table( shares_by_cat: Dict[str, float], exprimes_total: int, residuals: Dict[str, Dict[str, float]], band_label: str, ) -> pd.DataFrame: rows = [] for cat in ordered_categories(): share = float(shares_by_cat.get(cat, 0.0)) low_resid, high_resid = get_interval_bounds(residuals, cat, band_label) share_low = float(np.clip(share + low_resid, 0.0, 1.0)) share_high = float(np.clip(share + high_resid, 0.0, 1.0)) count = int(round(share * exprimes_total)) count_low = int(round(share_low * exprimes_total)) count_high = int(round(share_high * exprimes_total)) if count_low > count_high: count_low, count_high = count_high, count_low share_low, share_high = share_high, share_low rows.append( { "categorie": DISPLAY_CATEGORY_LABELS.get(cat, cat), "baseline_%": round(share * 100, 1), "min_%": round(share_low * 100, 1), "max_%": round(share_high * 100, 1), "baseline": count, "min": count_low, "max": count_high, } ) return pd.DataFrame(rows, columns=INTERVAL_OUTPUT_COLUMNS) def build_interval_chart( df: pd.DataFrame, *, value_col: str = "baseline", low_col: str = "min", high_col: str = "max", color_map: Dict[str, str] | None = None, ylabel: str = "Nombre d'électeurs", ): try: import matplotlib.pyplot as plt except Exception: return None if df.empty or value_col not in df.columns: return None labels = df["categorie"].astype(str).tolist() values = df[value_col].astype(float).to_numpy() low_vals = df[low_col].astype(float).to_numpy() high_vals = df[high_col].astype(float).to_numpy() lower_err = np.maximum(0.0, values - low_vals) upper_err = np.maximum(0.0, high_vals - values) yerr = np.vstack([lower_err, upper_err]) colors = [color_map.get(label, "#3b82f6") for label in labels] if color_map else "#3b82f6" plt.figure(figsize=(6, 3)) plt.bar(labels, values, color=colors, yerr=yerr, capsize=4) plt.xticks(rotation=30, ha="right") plt.ylabel(ylabel) plt.tight_layout() return plt def blend_with_type_history( preds_by_cat: Dict[str, float], row: pd.Series, target_type: str, ) -> Dict[str, float]: base_weight = TYPE_HISTORY_BLEND.get(str(target_type).lower(), 0.0) if base_weight <= 0: return preds_by_cat available = 0 hist_vals: Dict[str, float | None] = {} for cat in CANDIDATE_CATEGORIES: val = row.get(f"prev_share_type_lag1_{cat}") if val is not None and not pd.isna(val): hist_vals[cat] = float(val) available += 1 else: hist_vals[cat] = None if available == 0: return preds_by_cat weight = base_weight * (available / len(CANDIDATE_CATEGORIES)) blended: Dict[str, float] = {} for cat in CANDIDATE_CATEGORIES: base = float(preds_by_cat.get(cat, 0.0)) hist = hist_vals.get(cat) if hist is None: blended[cat] = base else: blended[cat] = (1 - weight) * base + weight * hist total = sum(blended.values()) if total > 0: for cat in blended: blended[cat] /= total return blended def _normalize_override_pct(value: float | None) -> float | None: if value is None: return None try: val = float(value) except (TypeError, ValueError): return None if np.isnan(val): return None return float(np.clip(val, 0.0, 100.0)) def apply_share_overrides( preds_by_cat: Dict[str, float], overrides_pct: Dict[str, float] | None, ordered: list[str], ) -> Dict[str, float]: if not overrides_pct: return preds_by_cat fixed = {} for cat, pct in overrides_pct.items(): if cat not in ordered: continue norm = _normalize_override_pct(pct) if norm is None: continue fixed[cat] = norm / 100.0 if not fixed: return preds_by_cat fixed_sum = sum(fixed.values()) if fixed_sum >= 1.0: scaled = {cat: (val / fixed_sum) for cat, val in fixed.items() if fixed_sum > 0} return {cat: float(scaled.get(cat, 0.0)) for cat in ordered} remaining = 1.0 - fixed_sum residual_cats = [cat for cat in ordered if cat not in fixed] base_sum = sum(float(preds_by_cat.get(cat, 0.0)) for cat in residual_cats) if base_sum <= 0 and residual_cats: per_cat = remaining / len(residual_cats) base_alloc = {cat: per_cat for cat in residual_cats} else: base_alloc = { cat: (float(preds_by_cat.get(cat, 0.0)) / base_sum) * remaining for cat in residual_cats } merged = {cat: float(base_alloc.get(cat, 0.0)) for cat in ordered} for cat, val in fixed.items(): merged[cat] = float(val) return merged def apply_transfers( counts: Dict[str, int], total_inscrits: int, transfers: list[Tuple[str, str, float]], ) -> Dict[str, int]: updated = {key: int(value) for key, value in counts.items()} for source, target, delta_pct in transfers: if delta_pct <= 0 or source == target: continue delta_count = int(round(total_inscrits * float(delta_pct) / 100.0)) if delta_count <= 0: continue available = max(0, int(updated.get(source, 0))) moved = min(available, delta_count) updated[source] = available - moved updated[target] = int(updated.get(target, 0)) + moved return updated def build_simulation_table( baseline: Dict[str, int], updated: Dict[str, int], ) -> pd.DataFrame: rows = [] for cat in PREDICTION_CATEGORY_ORDER: base = int(baseline.get(cat, 0)) new = int(updated.get(cat, 0)) rows.append( { "categorie": DISPLAY_CATEGORY_LABELS.get(cat, cat), "baseline": base, "apres_transfert": new, "delta": new - base, } ) return pd.DataFrame(rows, columns=SIM_OUTPUT_COLUMNS) def load_geojson_features(geo_dir: Path = GEO_DIR) -> list[dict]: if not geo_dir.exists(): return [] paths = sorted(geo_dir.glob("*.geojson")) + sorted(geo_dir.glob("*.json")) features: list[dict] = [] for path in paths: try: payload = json.loads(path.read_text()) except Exception: continue if isinstance(payload, dict): features.extend(payload.get("features", [])) return features def extract_bureau_number(label: str | None) -> int | None: if not label: return None match = re.search(r"(\d+)", str(label)) if not match: return None try: return int(match.group(1)) except ValueError: return None def match_bureau_code(commune_code: str, bureau_num: int, available_codes: set[str]) -> str: padded = str(bureau_num).zfill(4) candidates = [f"{commune_code}-{padded}", f"{commune_code}{padded}"] for candidate in candidates: if candidate in available_codes: return candidate return candidates[-1] def _iter_coords(geom: dict) -> list[Tuple[float, float]]: coords = [] geom_type = geom.get("type") if geom_type == "Polygon": for ring in geom.get("coordinates", []): coords.extend([(lon, lat) for lon, lat in ring]) elif geom_type == "MultiPolygon": for polygon in geom.get("coordinates", []): for ring in polygon: coords.extend([(lon, lat) for lon, lat in ring]) return coords def geojson_bounds(features: list[dict]) -> Tuple[Tuple[float, float], Tuple[float, float]] | None: lons = [] lats = [] for feature in features: geom = feature.get("geometry") or {} for lon, lat in _iter_coords(geom): lons.append(lon) lats.append(lat) if not lons or not lats: return None return (min(lats), min(lons)), (max(lats), max(lons)) def build_prediction_table_from_counts(counts_by_cat: Dict[str, int]) -> pd.DataFrame: rows = [] for cat in ordered_categories(): rows.append({"categorie": DISPLAY_CATEGORY_LABELS.get(cat, cat), "nombre": int(counts_by_cat.get(cat, 0))}) for extra in ["blancs", "nuls", "abstention"]: rows.append( { "categorie": DISPLAY_CATEGORY_LABELS[extra], "nombre": int(counts_by_cat.get(extra, 0)), } ) return pd.DataFrame(rows, columns=PREDICTION_OUTPUT_COLUMNS) def chart_base64_from_df( df: pd.DataFrame, value_col: str, ylabel: str, color_map: Dict[str, str], ) -> str | None: try: import matplotlib.pyplot as plt except Exception: return None if df.empty or value_col not in df.columns: return None labels = df["categorie"].astype(str).tolist() values = pd.to_numeric(df[value_col], errors="coerce").fillna(0).tolist() colors = [color_map.get(label, "#3b82f6") for label in labels] fig, ax = plt.subplots(figsize=(4.5, 3.2)) ax.barh(labels, values, color=colors) ax.invert_yaxis() ax.set_xlabel(ylabel) ax.tick_params(axis="y", labelsize=8) fig.tight_layout() buf = io.BytesIO() fig.savefig(buf, format="png", dpi=150) plt.close(fig) return base64.b64encode(buf.getvalue()).decode("ascii") def build_map_popup_html( bureau_label: str, table_df: pd.DataFrame, chart_b64: str | None, meta: str | None, ) -> str: title_html = f"{escape(bureau_label)}" meta_html = f"
Folium n'est pas disponible. Installe-le via requirements.txt.
" features = load_geojson_features() if not features: return "Aucune geojson trouvée dans data/geo.
" bounds = geojson_bounds(features) if bounds is None: return "Impossible de calculer l'emprise de la carte.
" (min_lat, min_lon), (max_lat, max_lon) = bounds center = [(min_lat + max_lat) / 2, (min_lon + max_lon) / 2] fmap = folium.Map(location=center, zoom_start=13, tiles="cartodbpositron") available_codes = set(backend.available_bureaux()) for feature in features: props = feature.get("properties", {}) label = props.get("name") or "Bureau" bureau_num = extract_bureau_number(label) if bureau_num is None: code_bv = None else: code_bv = match_bureau_code(COMMUNE_CODE_SETE, bureau_num, available_codes) fill_color = "#9ca3af" popup_html = None if code_bv is not None: details, _, meta = backend.predict_bureau_details(code_bv, target_type, target_year) if details is not None: shares = details["shares_by_cat"] left_share = float(shares.get("gauche_dure", 0.0) + shares.get("gauche_modere", 0.0)) right_share = float(shares.get("droite_dure", 0.0) + shares.get("droite_modere", 0.0)) if abs(left_share - right_share) <= NEUTRAL_MARGIN_SHARE: fill_color = "#ffffff" else: winner = max(shares, key=shares.get) fill_color = DISPLAY_CATEGORY_COLORS.get(winner, fill_color) table_df = build_prediction_table_from_counts(details["counts"]) chart_b64 = chart_base64_from_df( table_df, value_col="nombre", ylabel="Nombre d'electeurs", color_map=DISPLAY_LABEL_COLORS, ) popup_html = build_map_popup_html(str(label), table_df, chart_b64, meta) def _style(_: dict, color=fill_color): return { "fillColor": color, "color": "#111827", "weight": 1, "fillOpacity": 0.6, } geo = folium.GeoJson(feature, style_function=_style) if popup_html: geo.add_child(folium.Popup(popup_html, max_width=450)) geo.add_child(folium.Tooltip(str(label))) geo.add_to(fmap) fmap.fit_bounds([[min_lat, min_lon], [max_lat, max_lon]]) return fmap._repr_html_() def _project_rate( series: pd.Series, years: pd.Series, target_year: int, *, min_points_trend: int = 3, clamp_to_observed: bool = True, ) -> float | None: df = pd.DataFrame( { "value": pd.to_numeric(series, errors="coerce"), "year": pd.to_numeric(years, errors="coerce"), } ).dropna() if df.empty: return None values = df["value"].to_numpy() years_arr = df["year"].to_numpy() if len(set(years_arr)) >= min_points_trend and len(df) >= min_points_trend: with warnings.catch_warnings(): warnings.simplefilter("ignore", category=NP_RANK_WARNING) try: slope, intercept = np.polyfit(years_arr, values, 1) projected = slope * target_year + intercept except Exception: projected = values[-1] else: projected = values[-1] if clamp_to_observed and len(values): projected = min(max(projected, float(np.nanmin(values))), float(np.nanmax(values))) return float(min(1.0, max(0.0, projected))) def _allocate_counts(shares: np.ndarray, total: int) -> np.ndarray: if total <= 0 or shares.size == 0: return np.zeros_like(shares, dtype=int) shares = np.clip(shares, 0, None) if shares.sum() == 0: return np.zeros_like(shares, dtype=int) shares = shares / shares.sum() raw = shares * total floors = np.floor(raw) remainder = int(total - floors.sum()) if remainder > 0: order = np.argsort(-(raw - floors)) for idx in order[:remainder]: floors[idx] += 1 return floors.astype(int) def load_bureau_event_stats(commune_code: str) -> pd.DataFrame: candidates = [ Path("data/processed/elections_blocs.parquet"), Path("data/processed/elections_blocs.csv"), Path("data/interim/elections_long.parquet"), Path("data/interim/elections_long.csv"), ] df = pd.DataFrame() best = pd.DataFrame() for path in candidates: if not path.exists(): continue if path.suffix == ".parquet": df = pd.read_parquet(path) else: df = pd.read_csv(path, sep=";") if df.empty: continue if "type_scrutin" not in df.columns and "election_type" in df.columns: df["type_scrutin"] = df["election_type"] if "annee" not in df.columns and "election_year" in df.columns: df["annee"] = df["election_year"] if "tour" not in df.columns and "round" in df.columns: df["tour"] = df["round"] df["date_scrutin"] = pd.to_datetime(df.get("date_scrutin"), errors="coerce") for col in ["inscrits", "votants", "blancs", "nuls"]: if col in df.columns: df[col] = pd.to_numeric(df[col], errors="coerce") else: df[col] = np.nan if "code_commune" in df.columns: df["code_commune"] = df["code_commune"].astype(str) df = df[df["code_commune"] == str(commune_code)] else: df = df[df["code_bv"].astype(str).str.startswith(str(commune_code))] df = df.dropna(subset=["code_bv"]) if df.empty: continue has_blancs = df["blancs"].notna().any() or df["nuls"].notna().any() if has_blancs: best = df break if best.empty: best = df df = best if df.empty: return df group_cols = [col for col in ["code_bv", "type_scrutin", "annee", "tour", "date_scrutin"] if col in df.columns] agg = df.groupby(group_cols, as_index=False).agg( inscrits=("inscrits", "max"), votants=("votants", "max"), blancs=("blancs", "max"), nuls=("nuls", "max"), ) if "date_scrutin" in agg.columns: agg = agg.sort_values("date_scrutin") agg["election_type"] = agg.get("type_scrutin") agg["election_type"] = agg["election_type"].astype("string").str.strip().str.lower() agg["election_year"] = pd.to_numeric(agg.get("annee"), errors="coerce") agg["round"] = pd.to_numeric(agg.get("tour"), errors="coerce").fillna(1).astype(int) base_inscrits = agg["inscrits"].replace(0, np.nan) agg["turnout_pct"] = agg["votants"] / base_inscrits agg["blancs_pct"] = agg["blancs"] / base_inscrits agg["nuls_pct"] = agg["nuls"] / base_inscrits return agg[ [ "code_bv", "election_type", "election_year", "round", "date_scrutin", "inscrits", "votants", "blancs", "nuls", "turnout_pct", "blancs_pct", "nuls_pct", ] ] def load_commune_event_stats(commune_code: str) -> pd.DataFrame: candidates = [ Path("data/processed/commune_event_stats.parquet"), Path("data/processed/commune_event_stats.csv"), ] df = pd.DataFrame() for path in candidates: if not path.exists(): continue if path.suffix == ".parquet": df = pd.read_parquet(path) else: df = pd.read_csv(path, sep=";") if not df.empty: break if df.empty: return df if "type_scrutin" not in df.columns and "election_type" in df.columns: df["type_scrutin"] = df["election_type"] if "annee" not in df.columns and "election_year" in df.columns: df["annee"] = df["election_year"] if "tour" not in df.columns and "round" in df.columns: df["tour"] = df["round"] df["date_scrutin"] = pd.to_datetime(df.get("date_scrutin"), errors="coerce") for col in ["inscrits", "votants", "blancs", "nuls"]: if col in df.columns: df[col] = pd.to_numeric(df[col], errors="coerce") else: df[col] = np.nan if "code_commune" in df.columns: df["code_commune"] = df["code_commune"].astype(str) df = df[df["code_commune"] == str(commune_code)] else: return pd.DataFrame() if df.empty: return df base_inscrits = df["inscrits"].replace(0, np.nan) if "turnout_pct" not in df.columns: df["turnout_pct"] = df["votants"] / base_inscrits if "blancs_pct" not in df.columns: df["blancs_pct"] = df["blancs"] / base_inscrits if "nuls_pct" not in df.columns: df["nuls_pct"] = df["nuls"] / base_inscrits df["election_type"] = df["type_scrutin"].astype("string").str.strip().str.lower() df["election_year"] = pd.to_numeric(df.get("annee"), errors="coerce") df["round"] = pd.to_numeric(df.get("tour"), errors="coerce").fillna(1).astype(int) return df[ [ "code_commune", "election_type", "election_year", "round", "date_scrutin", "inscrits", "votants", "blancs", "nuls", "turnout_pct", "blancs_pct", "nuls_pct", ] ] def format_backend_label(backend_kind: str) -> str: return "PostgreSQL" if backend_kind == "postgres" else "fichiers locaux" def format_election_type_label(election_type: str) -> str: label = ELECTION_TYPE_LABELS.get(election_type) if label: return label return str(election_type).replace("_", " ").title() def format_election_label( election_type: str, election_year: int, round_num: int, date_scrutin: pd.Timestamp | None = None, ) -> str: base = f"{format_election_type_label(election_type)} {election_year} - Tour {round_num}" if date_scrutin is None or pd.isna(date_scrutin): return base date_value = pd.to_datetime(date_scrutin).date().isoformat() return f"{base} ({date_value})" def format_election_key(election_type: str, election_year: int, round_num: int) -> str: return f"{election_type}{ELECTION_KEY_SEP}{election_year}{ELECTION_KEY_SEP}{round_num}" def parse_election_key(key: str) -> Tuple[str, int, int]: parts = key.split(ELECTION_KEY_SEP) if len(parts) != 3: raise ValueError(f"Clé d'élection invalide: {key!r}") return parts[0], int(parts[1]), int(parts[2]) def format_bureau_label(code_bv: str, bureau_label: str | None) -> str: code = str(code_bv) suffix = code.split("-")[-1] if "-" in code else code if bureau_label is not None and not pd.isna(bureau_label): label = str(bureau_label).strip() if label and label != code: return f"{label} ({code})" return f"Bureau {suffix} ({code})" def build_bureau_choices(history: pd.DataFrame) -> list[tuple[str, str]]: if history.empty: return [] if "bureau_label" in history.columns: label_map = ( history[["code_bv", "bureau_label"]] .dropna(subset=["code_bv"]) .drop_duplicates() .sort_values("code_bv") .groupby("code_bv", as_index=False)["bureau_label"] .first() ) return [ (format_bureau_label(row.code_bv, row.bureau_label), row.code_bv) for row in label_map.itertuples(index=False) ] codes = sorted(history["code_bv"].dropna().unique().tolist()) return [(format_bureau_label(code, None), code) for code in codes] def build_history_choices(history: pd.DataFrame) -> list[tuple[str, str]]: if history.empty: return [] events = ( history[["election_type", "election_year", "round", "date_scrutin"]] .dropna(subset=["election_type", "election_year", "round"]) .drop_duplicates() .groupby(["election_type", "election_year", "round"], as_index=False) .agg(date_scrutin=("date_scrutin", "min")) .sort_values(["election_year", "election_type", "round"]) ) return [ ( format_election_label( row.election_type, int(row.election_year), int(row.round), row.date_scrutin, ), format_election_key(row.election_type, int(row.election_year), int(row.round)), ) for row in events.itertuples(index=False) ] def clean_history_frame(history: pd.DataFrame) -> pd.DataFrame: if history.empty: return history clean = history.copy() clean["code_bv"] = clean["code_bv"].astype("string").str.strip() clean["election_type"] = clean["election_type"].astype("string").str.strip().str.lower() clean["category"] = clean["category"].astype("string").str.strip().str.lower() if "bureau_label" in clean.columns: clean["bureau_label"] = clean["bureau_label"].astype("string").str.strip() clean["election_year"] = pd.to_numeric(clean["election_year"], errors="coerce") clean["round"] = pd.to_numeric(clean["round"], errors="coerce").fillna(1) clean["date_scrutin"] = pd.to_datetime(clean["date_scrutin"], errors="coerce") for col in ["share", "share_nat", "turnout_pct"]: if col in clean.columns: clean[col] = pd.to_numeric(clean[col], errors="coerce").clip(lower=0, upper=1) clean = clean.dropna(subset=["code_bv", "election_type", "election_year", "round", "category"]) clean["election_year"] = clean["election_year"].astype(int) clean["round"] = clean["round"].astype(int) clean = clean[clean["category"].isin(CANDIDATE_CATEGORIES)] return clean def prepare_history_table(history_slice: pd.DataFrame) -> pd.DataFrame: if history_slice.empty: return pd.DataFrame(columns=HISTORY_OUTPUT_COLUMNS) grouped = history_slice.groupby("category", as_index=False).agg(share=("share", "sum")) clean = pd.DataFrame({"category": ordered_categories()}).merge(grouped, on="category", how="left") clean["share"] = pd.to_numeric(clean["share"], errors="coerce").fillna(0).clip(lower=0, upper=1) clean["score_%"] = (clean["share"] * 100).round(1) clean["categorie"] = clean["category"].map(DISPLAY_CATEGORY_LABELS).fillna(clean["category"]) return clean[HISTORY_OUTPUT_COLUMNS] def format_history_meta(history_slice: pd.DataFrame) -> str: if history_slice.empty: return "" parts = [] dates = history_slice["date_scrutin"].dropna() if not dates.empty: date_value = pd.to_datetime(dates.iloc[0]).date().isoformat() parts.append(f"Date du scrutin : {date_value}") turnout_vals = pd.to_numeric(history_slice["turnout_pct"], errors="coerce").dropna() if not turnout_vals.empty: parts.append(f"Participation : {turnout_vals.iloc[0] * 100:.1f}%") return " | ".join(parts) def _code_bv_full(commune_code: str, bureau_code: str) -> str: bureau_code = str(bureau_code).zfill(4) return f"{commune_code}-{bureau_code}" def load_history_from_db(commune_code: str) -> pd.DataFrame: engine = get_engine() query = sa.text( """ select cm.insee_code as commune_code, b.bureau_code, b.bureau_label, e.election_type, e.election_year, coalesce(e.round, 1) as round, e.date as date_scrutin, c.name as category, rl.share_pct, rl.turnout_pct, rn.share_pct as share_nat from results_local rl join bureaux b on rl.bureau_id = b.id join communes cm on b.commune_id = cm.id join elections e on rl.election_id = e.id join categories c on rl.category_id = c.id left join results_national rn on rn.election_id = e.id and rn.category_id = rl.category_id where cm.insee_code = :commune """ ) df = pd.read_sql(query, engine, params={"commune": commune_code}) if df.empty: raise RuntimeError("Aucune donnée dans la base pour la commune demandée.") df["code_bv"] = df.apply(lambda r: _code_bv_full(r["commune_code"], r["bureau_code"]), axis=1) df["date_scrutin"] = pd.to_datetime(df["date_scrutin"]) df["share"] = pd.to_numeric(df["share_pct"], errors="coerce") / 100 df["share_nat"] = pd.to_numeric(df["share_nat"], errors="coerce") / 100 df["turnout_pct"] = pd.to_numeric(df["turnout_pct"], errors="coerce") / 100 df["election_year"] = pd.to_numeric(df["election_year"], errors="coerce") df["round"] = pd.to_numeric(df["round"], errors="coerce").fillna(1).astype(int) return df[ [ "commune_code", "code_bv", "bureau_label", "election_type", "election_year", "round", "date_scrutin", "category", "share", "share_nat", "turnout_pct", ] ] def load_history_from_files(commune_code: str) -> pd.DataFrame: elections_long_all = load_elections_long( Path("data/interim/elections_long.parquet"), commune_code=commune_code, ) mapping = load_mapping(Path("data/mapping_candidats_blocs.csv")) expanded_all = expand_by_category(elections_long_all, mapping) local_all = aggregate_by_event(expanded_all) nat = compute_national_reference(local_all) local = local_all[local_all["commune_code"] == commune_code].copy() local = local.merge(nat, on=["election_type", "election_year", "round", "category"], how="left") # Columns already in aggregate_by_event/compute_national_reference if "share" not in local.columns: raise RuntimeError("Colonne share absente du dataset local (fallback fichiers).") local["bureau_label"] = None local["share_nat"] = local.get("share_nat") local["turnout_pct"] = local.get("turnout_pct") return local.rename( columns={ "annee": "election_year", "tour": "round", } )[ [ "commune_code", "code_bv", "bureau_label", "election_type", "election_year", "round", "date_scrutin", "category", "share", "share_nat", "turnout_pct", ] ] def references_from_history(history: pd.DataFrame, target_year: int) -> Dict[str, Dict[Tuple[str, str], float]]: hist = history[history["election_year"] < target_year].copy() leg = ( hist[hist["election_type"] == "legislatives"] .sort_values("date_scrutin") .groupby(["code_bv", "category"])["share"] .last() ) mun2020 = ( hist[(hist["election_type"] == "municipales") & (hist["election_year"] == 2020)] .sort_values("date_scrutin") .groupby(["code_bv", "category"])["share"] .last() ) return {"leg": leg.to_dict(), "mun2020": mun2020.to_dict()} def build_features_from_history(history: pd.DataFrame, target_type: str, target_year: int) -> pd.DataFrame: hist = history[history["election_year"] < target_year].copy() if hist.empty: return pd.DataFrame() hist = hist.sort_values("date_scrutin") hist["dev_to_nat"] = hist["share"] - hist["share_nat"] last_any_share = hist.groupby(["code_bv", "category"])["share"].last() last_any_dev = hist.groupby(["code_bv", "category"])["dev_to_nat"].last() last_type_share = ( hist[hist["election_type"] == target_type] .groupby(["code_bv", "category"])["share"] .last() ) last_type_dev = ( hist[hist["election_type"] == target_type] .groupby(["code_bv", "category"])["dev_to_nat"] .last() ) swing_any = ( hist.groupby(["code_bv", "category"])["share"] .apply(lambda s: s.iloc[-1] - s.iloc[-2] if len(s) >= 2 else np.nan) .rename("swing_any") ) turnout_any = hist.groupby("code_bv")["turnout_pct"].last() turnout_type = ( hist[hist["election_type"] == target_type] .groupby("code_bv")["turnout_pct"] .last() ) bureaux = sorted(hist["code_bv"].dropna().unique()) records = [] for code_bv in bureaux: record = { "commune_code": str(code_bv).split("-")[0], "code_bv": code_bv, "election_type": target_type, "election_year": target_year, "round": 1, "date_scrutin": f"{target_year}-01-01", "prev_turnout_any_lag1": turnout_any.get(code_bv, np.nan), "prev_turnout_same_type_lag1": turnout_type.get(code_bv, np.nan), } for cat in CANDIDATE_CATEGORIES: record[f"prev_share_any_lag1_{cat}"] = last_any_share.get((code_bv, cat), np.nan) record[f"prev_share_type_lag1_{cat}"] = last_type_share.get((code_bv, cat), np.nan) record[f"prev_dev_to_national_any_lag1_{cat}"] = last_any_dev.get((code_bv, cat), np.nan) record[f"prev_dev_to_national_type_lag1_{cat}"] = last_type_dev.get((code_bv, cat), np.nan) record[f"swing_any_{cat}"] = swing_any.get((code_bv, cat), np.nan) records.append(record) return pd.DataFrame.from_records(records) def load_model() -> Path: best_path = MODEL_DIR / "best_model.json" if best_path.exists(): try: payload = json.loads(best_path.read_text()) name = payload.get("name") if name: candidate = MODEL_DIR / f"{name}.joblib" if candidate.exists(): return candidate except Exception: pass if (MODEL_DIR / "hist_gradient_boosting.joblib").exists(): return MODEL_DIR / "hist_gradient_boosting.joblib" joblibs = sorted(MODEL_DIR.glob("*.joblib")) if not joblibs: raise FileNotFoundError("Aucun modèle trouvé dans models/. Lancez src/model/train.py.") return joblibs[0] def load_feature_columns(path: Path, df: pd.DataFrame) -> list[str]: if path.exists(): return json.loads(path.read_text()) exclude = {"commune_code", "code_bv", "election_type", "election_year", "round", "date_scrutin"} return [c for c in df.columns if c not in exclude] def format_delta(value) -> str: if value is None or (isinstance(value, float) and np.isnan(value)): return "N/A" sign = "+" if value >= 0 else "" return f"{sign}{round(value, 1)}" class PredictorBackend: def __init__(self, commune_code: str = COMMUNE_CODE_SETE): self.commune_code = commune_code self.backend = "local" try: self.history = load_history_from_db(commune_code) self.backend = "postgres" LOGGER.info("Backend PostgreSQL chargé (%s lignes)", len(self.history)) except Exception as exc: LOGGER.warning("PostgreSQL indisponible (%s) -> fallback fichiers.", exc) self.history = load_history_from_files(commune_code) self.backend = "files" LOGGER.info("Backend fichiers chargé (%s lignes)", len(self.history)) self.history = clean_history_frame(self.history) self.event_stats = load_bureau_event_stats(commune_code) self.commune_stats = load_commune_event_stats(commune_code) self.default_rates = {} self.default_rates_by_type: dict[str, dict[str, float]] = {} stats = self.commune_stats if not self.commune_stats.empty else self.event_stats if not stats.empty: if "round" in stats.columns: round1 = stats[stats["round"] == 1] if not round1.empty: stats = round1 self.default_rates = { "turnout_pct": float(stats["turnout_pct"].median(skipna=True)), "blancs_pct": float(stats["blancs_pct"].median(skipna=True)), "nuls_pct": float(stats["nuls_pct"].median(skipna=True)), } if "election_type" in stats.columns: for etype, group in stats.groupby("election_type"): self.default_rates_by_type[str(etype)] = { "turnout_pct": float(group["turnout_pct"].median(skipna=True)), "blancs_pct": float(group["blancs_pct"].median(skipna=True)), "nuls_pct": float(group["nuls_pct"].median(skipna=True)), } self.model_path = load_model() self.model = joblib.load(self.model_path) # feature cache per target self.refs_cache: Dict[Tuple[str, int], Dict[str, Dict[Tuple[str, str], float]]] = {} def available_bureaux(self) -> list[str]: return sorted(self.history["code_bv"].dropna().unique().tolist()) def available_targets(self) -> list[Tuple[str, int]]: existing = set() for row in self.history.itertuples(index=False): try: year = int(row.election_year) # type: ignore except Exception: continue existing.add((row.election_type, year)) for t in DEFAULT_TARGETS: existing.add(t) return sorted(existing, key=lambda x: (x[1], x[0])) def _get_features_and_refs(self, target_type: str, target_year: int) -> Tuple[pd.DataFrame, Dict[str, Dict[Tuple[str, str], float]]]: key = (target_type, target_year) if key not in FEATURE_CACHE: feature_df = build_features_from_history(self.history, target_type, target_year) refs = references_from_history(self.history, target_year) FEATURE_CACHE[key] = (feature_df, refs) return FEATURE_CACHE[key] def predict_bureau_details( self, code_bv: str, target_type: str, target_year: int, inscrits_override: float | None = None, share_overrides: Dict[str, float] | None = None, abstention_override_pct: float | None = None, blancs_override_pct: float | None = None, nuls_override_pct: float | None = None, ) -> Tuple[Dict[str, object] | None, str, str]: feature_df, _ = self._get_features_and_refs(target_type, target_year) if feature_df.empty: return None, "Données insuffisantes", "" row = feature_df[feature_df["code_bv"] == code_bv].copy() if row.empty: return None, "Bureau non trouvé dans l'historique.", "" feature_cols = load_feature_columns(FEATURE_COLS_PATH, feature_df) missing = [c for c in feature_cols if c not in row.columns] for col in missing: row[col] = np.nan preds = self.model.predict(row[feature_cols]) preds = np.clip(preds, 0, 1) sums = preds.sum(axis=1, keepdims=True) sums[sums == 0] = 1 preds = preds / sums preds_share = preds.flatten() preds_by_cat = {cat: float(preds_share[idx]) for idx, cat in enumerate(CANDIDATE_CATEGORIES)} preds_by_cat = blend_with_type_history(preds_by_cat, row.iloc[0], target_type) ordered = ordered_categories() preds_by_cat = apply_share_overrides(preds_by_cat, share_overrides, ordered) share_vec = np.array([preds_by_cat.get(cat, 0.0) for cat in ordered], dtype=float) stats = self.event_stats[self.event_stats["code_bv"] == code_bv].sort_values("date_scrutin") inscrits_used = None if inscrits_override is not None: try: value = float(inscrits_override) if value > 0: inscrits_used = value except (TypeError, ValueError): inscrits_used = None if inscrits_used is None and not stats.empty: serie = pd.to_numeric(stats["inscrits"], errors="coerce").dropna() if not serie.empty: inscrits_used = float(serie.iloc[-1]) if inscrits_used is None: return None, "Inscrits indisponibles pour ce bureau.", "" def pick_rate(col: str) -> float: default = self.default_rates.get(col, 0.0) default = 0.0 if default is None or np.isnan(default) else float(default) type_default = self.default_rates_by_type.get(target_type, {}).get(col) if type_default is None or np.isnan(type_default): type_default = default bureau_scoped = self.event_stats if not bureau_scoped.empty and "round" in bureau_scoped.columns: round1 = bureau_scoped[bureau_scoped["round"] == 1] if not round1.empty: bureau_scoped = round1 series = None years = None if ( not bureau_scoped.empty and col in bureau_scoped.columns and "election_type" in bureau_scoped.columns ): if target_type in bureau_scoped["election_type"].values: mask = bureau_scoped["election_type"] == target_type series = bureau_scoped.loc[mask, col] years = bureau_scoped.loc[mask, "election_year"] if series is None and not self.commune_stats.empty and col in self.commune_stats.columns: commune_scoped = self.commune_stats if "round" in commune_scoped.columns: round1 = commune_scoped[commune_scoped["round"] == 1] if not round1.empty: commune_scoped = round1 if target_type in commune_scoped["election_type"].values: mask = commune_scoped["election_type"] == target_type series = commune_scoped.loc[mask, col] years = commune_scoped.loc[mask, "election_year"] else: series = commune_scoped[col] years = commune_scoped["election_year"] if series is None: if bureau_scoped.empty or col not in bureau_scoped.columns: return type_default series = bureau_scoped[col] years = bureau_scoped["election_year"] rate = _project_rate(series, years, target_year) if rate is None or np.isnan(rate): return type_default return float(rate) turnout_rate = pick_rate("turnout_pct") blancs_rate = pick_rate("blancs_pct") nuls_rate = pick_rate("nuls_pct") abstention_override = _normalize_override_pct(abstention_override_pct) if abstention_override is not None: turnout_rate = float(np.clip(1.0 - (abstention_override / 100.0), 0.0, 1.0)) blancs_override = _normalize_override_pct(blancs_override_pct) if blancs_override is not None: blancs_rate = float(blancs_override / 100.0) nuls_override = _normalize_override_pct(nuls_override_pct) if nuls_override is not None: nuls_rate = float(nuls_override / 100.0) if blancs_rate + nuls_rate > turnout_rate and (blancs_rate + nuls_rate) > 0: scale = turnout_rate / (blancs_rate + nuls_rate) blancs_rate *= scale nuls_rate *= scale inscrits_total = int(round(inscrits_used)) votants_total = int(round(inscrits_total * turnout_rate)) blancs_total = int(round(inscrits_total * blancs_rate)) nuls_total = int(round(inscrits_total * nuls_rate)) if blancs_total + nuls_total > votants_total and (blancs_total + nuls_total) > 0: scale = votants_total / (blancs_total + nuls_total) blancs_total = int(round(blancs_total * scale)) nuls_total = int(round(nuls_total * scale)) exprimes_total = max(0, votants_total - blancs_total - nuls_total) abstention_total = max(0, inscrits_total - votants_total) bloc_counts = _allocate_counts(share_vec, exprimes_total) counts_by_cat = {cat: int(count) for cat, count in zip(ordered, bloc_counts)} counts_by_cat.update( { "blancs": int(blancs_total), "nuls": int(nuls_total), "abstention": int(abstention_total), } ) backend_label = format_backend_label(self.backend) meta = ( f"Inscrits utilisés : {inscrits_total} | Votants : {votants_total} | " f"Blancs : {blancs_total} | Nuls : {nuls_total} | Abstentions : {abstention_total}" ) details = { "shares_by_cat": preds_by_cat, "share_vec": share_vec, "ordered": ordered, "counts": counts_by_cat, "totals": { "inscrits": inscrits_total, "votants": votants_total, "blancs": blancs_total, "nuls": nuls_total, "abstention": abstention_total, "exprimes": exprimes_total, }, } return details, backend_label, meta def predict_bureau( self, code_bv: str, target_type: str, target_year: int, inscrits_override: float | None = None, share_overrides: Dict[str, float] | None = None, abstention_override_pct: float | None = None, blancs_override_pct: float | None = None, nuls_override_pct: float | None = None, ) -> Tuple[pd.DataFrame, str, str]: details, backend_label, meta = self.predict_bureau_details( code_bv, target_type, target_year, inscrits_override, share_overrides, abstention_override_pct, blancs_override_pct, nuls_override_pct, ) if details is None: return pd.DataFrame(), backend_label, "" counts_by_cat = details["counts"] ordered = details["ordered"] rows = [] for cat in ordered: rows.append( { "categorie": DISPLAY_CATEGORY_LABELS.get(cat, cat), "nombre": int(counts_by_cat.get(cat, 0)), } ) for extra in ["blancs", "nuls", "abstention"]: rows.append( { "categorie": DISPLAY_CATEGORY_LABELS[extra], "nombre": int(counts_by_cat.get(extra, 0)), } ) return pd.DataFrame(rows), backend_label, meta def build_bar_chart( df: pd.DataFrame, value_col: str, *, color: str = "#3b82f6", color_map: Dict[str, str] | None = None, category_col: str = "categorie", ylabel: str = "Score (%)", ): try: import matplotlib.pyplot as plt except Exception: return None if df.empty or value_col not in df.columns: return None plt.figure(figsize=(6, 3)) labels = df[category_col].astype(str).tolist() if category_col in df.columns else [] if color_map: colors = [color_map.get(label, color) for label in labels] else: colors = color plt.bar(labels, df[value_col], color=colors) plt.xticks(rotation=30, ha="right") plt.ylabel(ylabel) plt.tight_layout() return plt def create_interface() -> gr.Blocks: backend = PredictorBackend() bureau_choices = build_bureau_choices(backend.history) bureau_labels = [label for label, _ in bureau_choices] bureau_map = {label: value for label, value in bureau_choices} bureau_label_by_code = {value: label for label, value in bureau_choices} targets = backend.available_targets() target_labels = [f"{t} {y}" for t, y in targets] history_choices = build_history_choices(backend.history) history_labels = [label for label, _ in history_choices] history_map = {label: value for label, value in history_choices} if ("municipales", 2026) in targets: default_target = "municipales 2026" elif targets: default_target = f"{targets[-1][0]} {targets[-1][1]}" else: default_target = "municipales 2026" default_bv = bureau_labels[0] if bureau_labels else None default_history = history_labels[-1] if history_labels else None backend_label = format_backend_label(backend.backend) residual_payload = load_residual_intervals() residuals = residual_payload.get("residuals", {}) if isinstance(residual_payload, dict) else {} residual_model = residual_payload.get("model", "inconnu") if isinstance(residual_payload, dict) else "inconnu" interval_choices = list(INTERVAL_BANDS.keys()) or ["80% (p10-p90)"] interval_default = interval_choices[0] bloc_labels = [DISPLAY_CATEGORY_LABELS.get(cat, cat) for cat in ordered_categories()] with gr.Blocks(title="Prévision Municipales — Ville de Sète") as demo: gr.Markdown( """ # Prévision Municipales — Ville de Sète Choisissez un bureau de vote et une élection cible. Le modèle estime un volume par catégorie politique, ainsi que les abstentions, blancs et nuls. Auteur : [Stéphane Manet](https://manet-conseil.fr) - [Linkedin](https://www.linkedin.com/in/stephanemanet) - [GitHub](https://github.com/stephmnt) """ ) with gr.Tabs(): with gr.Tab("Prévisions"): with gr.Row(): bureau_dd = gr.Dropdown(choices=bureau_labels, value=default_bv, label="Bureau de vote") target_dd = gr.Dropdown(choices=target_labels, value=default_target, label="Élection cible (type année)") inscrits_in = gr.Number(value=None, label="Inscrits (optionnel)", precision=0) override_inputs: Dict[str, gr.Number] = {} with gr.Accordion("Imputation manuelle (optionnel)", open=False): gr.Markdown("Abstention / blancs / nuls en % des inscrits.") with gr.Row(): abstention_in = gr.Number(value=40, label="Abstention (% inscrits)", precision=1) blancs_in = gr.Number(value=None, label="Blancs (% inscrits)", precision=1) nuls_in = gr.Number(value=None, label="Nuls (% inscrits)", precision=1) gr.Markdown("Nuances politiques en % des exprimés (laisser vide pour garder le modèle).") cats = ordered_categories() with gr.Row(): for cat in cats[:4]: override_inputs[cat] = gr.Number( value=None, label=DISPLAY_CATEGORY_LABELS.get(cat, cat), precision=1, ) with gr.Row(): for cat in cats[4:]: override_inputs[cat] = gr.Number( value=None, label=DISPLAY_CATEGORY_LABELS.get(cat, cat), precision=1, ) predict_btn = gr.Button("Prédire") source_box = gr.Markdown(value=f"Source des données : {backend_label}") output_df = gr.Dataframe( headers=PREDICTION_OUTPUT_COLUMNS, label="Prédictions (nombres)", ) chart = gr.Plot() with gr.Tab("Historique"): gr.Markdown( """ Consultation des résultats passés (sans machine learning). Sélectionnez un bureau et une élection pour afficher l'histogramme des parts par tendance politique. """ ) with gr.Row(): history_bureau_dd = gr.Dropdown(choices=bureau_labels, value=default_bv, label="Bureau de vote") history_election_dd = gr.Dropdown( choices=history_labels, value=default_history, label="Élection (type année tour)", ) history_btn = gr.Button("Afficher l'historique") history_source = gr.Markdown(value=f"Source des données : {backend_label}") history_df = gr.Dataframe(headers=HISTORY_OUTPUT_COLUMNS, label="Résultats historiques") history_chart = gr.Plot() history_meta = gr.Markdown() with gr.Tab("Carte"): gr.Markdown( """ Carte des bureaux de vote de Sète. Cliquez sur un polygone pour afficher la prédiction (table + graphique). """ ) map_legend = gr.HTML(value=build_map_legend_html()) with gr.Row(): map_target_dd = gr.Dropdown( choices=target_labels, value=default_target, label="Élection cible (type année)", ) map_btn = gr.Button("Afficher la carte") map_html = gr.HTML(value="Cliquez sur 'Afficher la carte' pour charger la carte.
") with gr.Tab("Stratégie"): gr.Markdown( """ Analyse stratégique par bureau : intervalles d'incertitude issus des résidus CV, puis simulateur de transferts pour estimer des bascules potentielles. """ ) with gr.Row(): strategy_bureau_dd = gr.Dropdown(choices=bureau_labels, value=default_bv, label="Bureau de vote") strategy_target_dd = gr.Dropdown( choices=target_labels, value=default_target, label="Élection cible (type année)", ) strategy_inscrits_in = gr.Number(value=None, label="Inscrits (optionnel)", precision=0) interval_dd = gr.Dropdown( choices=interval_choices, value=interval_default, label="Intervalle CV", ) strategy_btn = gr.Button("Analyser l'incertitude") interval_source = gr.Markdown( value=( f"Intervalle CV basé sur le modèle : {residual_model}" if residuals else "Intervalle CV indisponible (fallback ±3%)." ) ) interval_df = gr.Dataframe( headers=INTERVAL_OUTPUT_COLUMNS, label="Plage empirique par bloc", ) interval_chart = gr.Plot() gr.Markdown("### Simulateur de transferts (points d'inscrits)") with gr.Row(): target_bloc_dd = gr.Dropdown(choices=bloc_labels, value=bloc_labels[0] if bloc_labels else None, label="Bloc cible") with gr.Row(): source_1_dd = gr.Dropdown(choices=TRANSFER_CATEGORY_LABELS, value=DISPLAY_CATEGORY_LABELS["abstention"], label="Source 1") target_1_dd = gr.Dropdown(choices=TRANSFER_CATEGORY_LABELS, value=DISPLAY_CATEGORY_LABELS["droite_dure"], label="Cible 1") delta_1 = gr.Slider(minimum=0, maximum=10, value=3, step=0.1, label="Delta 1 (points %)") with gr.Row(): source_2_dd = gr.Dropdown(choices=TRANSFER_CATEGORY_LABELS, value=DISPLAY_CATEGORY_LABELS["droite_modere"], label="Source 2") target_2_dd = gr.Dropdown(choices=TRANSFER_CATEGORY_LABELS, value=DISPLAY_CATEGORY_LABELS["gauche_modere"], label="Cible 2") delta_2 = gr.Slider(minimum=0, maximum=10, value=3, step=0.1, label="Delta 2 (points %)") simulate_btn = gr.Button("Simuler les transferts") sim_df = gr.Dataframe(headers=SIM_OUTPUT_COLUMNS, label="Simulation par catégorie") sim_chart = gr.Plot() opportunity_df = gr.Dataframe(headers=OPPORTUNITY_OUTPUT_COLUMNS, label="Bureaux à potentiel (trié)") def _predict( bv_label: str, target_label: str, inscrits_override: float | None, abstention_override: float | None, blancs_override: float | None, nuls_override: float | None, *cat_overrides: float, ): if not bv_label or not target_label: return pd.DataFrame(), "Entrée invalide", None code_bv = bureau_map.get(bv_label) if not code_bv: return pd.DataFrame(), "Bureau invalide", None try: parts = target_label.split() target_type, target_year = parts[0].lower(), int(parts[1]) except Exception: target_type, target_year = "municipales", 2026 share_overrides: Dict[str, float] = {} for cat, value in zip(ordered_categories(), cat_overrides): norm = _normalize_override_pct(value) if norm is None: continue share_overrides[cat] = norm df, backend_label, meta = backend.predict_bureau( code_bv, target_type, target_year, inscrits_override, share_overrides=share_overrides if share_overrides else None, abstention_override_pct=abstention_override, blancs_override_pct=blancs_override, nuls_override_pct=nuls_override, ) plot = build_bar_chart( df, value_col="nombre", ylabel="Nombre d'électeurs", color_map=DISPLAY_LABEL_COLORS, ) meta_label = f" | {meta}" if meta else "" return df, f"Source des données : {backend_label}{meta_label}", plot def _parse_target_label(target_label: str) -> Tuple[str, int]: try: parts = target_label.split() return parts[0].lower(), int(parts[1]) except Exception: return "municipales", 2026 def _map(target_label: str): if not target_label: return "Élection invalide.
" target_type, target_year = _parse_target_label(target_label) return build_bureau_map_html(backend, target_type, target_year) def _history(bv_label: str, election_label: str): if not bv_label or not election_label: empty = pd.DataFrame(columns=HISTORY_OUTPUT_COLUMNS) return empty, "Entrée invalide", None, "" code_bv = bureau_map.get(bv_label) if not code_bv: empty = pd.DataFrame(columns=HISTORY_OUTPUT_COLUMNS) return empty, "Bureau invalide", None, "" election_key = history_map.get(election_label) if not election_key: empty = pd.DataFrame(columns=HISTORY_OUTPUT_COLUMNS) return empty, "Élection invalide", None, "" try: election_type, election_year, round_num = parse_election_key(election_key) except Exception: empty = pd.DataFrame(columns=HISTORY_OUTPUT_COLUMNS) return empty, "Élection invalide", None, "" history_slice = backend.history[ (backend.history["code_bv"] == code_bv) & (backend.history["election_type"] == election_type) & (backend.history["election_year"] == election_year) & (backend.history["round"] == round_num) ].copy() if history_slice.empty: empty = pd.DataFrame(columns=HISTORY_OUTPUT_COLUMNS) return empty, f"Source des données : {backend_label}", None, "Aucun résultat pour ce bureau." table = prepare_history_table(history_slice) plot = build_bar_chart( table, value_col="score_%", ylabel="Score (%)", color_map=DISPLAY_LABEL_COLORS, ) meta = format_history_meta(history_slice) return table, f"Source des données : {backend_label}", plot, meta def _strategy_interval( bv_label: str, target_label: str, inscrits_override: float | None, band_label: str, ): empty = pd.DataFrame(columns=INTERVAL_OUTPUT_COLUMNS) if not bv_label or not target_label: return empty, "Entrée invalide", None code_bv = bureau_map.get(bv_label) if not code_bv: return empty, "Bureau invalide", None target_type, target_year = _parse_target_label(target_label) details, backend_label_local, _ = backend.predict_bureau_details( code_bv, target_type, target_year, inscrits_override, ) if details is None: return empty, backend_label_local, None totals = details["totals"] exprimes_total = int(totals.get("exprimes", 0)) table = build_interval_table( details["shares_by_cat"], exprimes_total, residuals, band_label, ) plot = build_interval_chart(table, color_map=DISPLAY_LABEL_COLORS) source = ( f"Intervalle CV ({band_label}) basé sur le modèle : {residual_model}" if residuals else "Intervalle CV indisponible (fallback ±3%)." ) return table, source, plot def _strategy_simulate( bv_label: str, target_label: str, inscrits_override: float | None, bloc_cible_label: str, source_1: str, target_1: str, delta_1_val: float, source_2: str, target_2: str, delta_2_val: float, ): empty_sim = pd.DataFrame(columns=SIM_OUTPUT_COLUMNS) empty_oppo = pd.DataFrame(columns=OPPORTUNITY_OUTPUT_COLUMNS) if not bv_label or not target_label: return empty_sim, None, empty_oppo code_bv = bureau_map.get(bv_label) if not code_bv: return empty_sim, None, empty_oppo target_type, target_year = _parse_target_label(target_label) details, _, _ = backend.predict_bureau_details( code_bv, target_type, target_year, inscrits_override, ) if details is None: return empty_sim, None, empty_oppo transfers = [] for src_label, dst_label, delta in [ (source_1, target_1, delta_1_val), (source_2, target_2, delta_2_val), ]: src_key = CATEGORY_LABEL_TO_KEY.get(src_label) dst_key = CATEGORY_LABEL_TO_KEY.get(dst_label) if src_key and dst_key and delta and delta > 0: transfers.append((src_key, dst_key, float(delta))) counts = details["counts"] totals = details["totals"] inscrits_total = int(totals.get("inscrits", 0)) updated = apply_transfers(counts, inscrits_total, transfers) sim_table = build_simulation_table(counts, updated) sim_plot = build_bar_chart( sim_table, value_col="apres_transfert", ylabel="Nombre d'électeurs", color_map=DISPLAY_LABEL_COLORS, ) target_bloc = CATEGORY_LABEL_TO_KEY.get(bloc_cible_label, bloc_cible_label) opp_rows = [] if target_bloc in ordered_categories(): for bv_code in backend.available_bureaux(): override = inscrits_override if bv_code == code_bv else None bv_details, _, _ = backend.predict_bureau_details( bv_code, target_type, target_year, override, ) if bv_details is None: continue base_counts = bv_details["counts"] bv_totals = bv_details["totals"] bv_inscrits = int(bv_totals.get("inscrits", 0)) updated_counts = apply_transfers(base_counts, bv_inscrits, transfers) bloc_counts = {cat: int(base_counts.get(cat, 0)) for cat in ordered_categories()} updated_blocs = {cat: int(updated_counts.get(cat, 0)) for cat in ordered_categories()} top_base = max(bloc_counts, key=bloc_counts.get) if bloc_counts else None top_after = max(updated_blocs, key=updated_blocs.get) if updated_blocs else None gain = int(updated_counts.get(target_bloc, 0) - base_counts.get(target_bloc, 0)) opp_rows.append( { "bureau": bureau_label_by_code.get(bv_code, bv_code), "gain_cible": gain, "score_base": int(base_counts.get(target_bloc, 0)), "score_apres": int(updated_counts.get(target_bloc, 0)), "top_base": DISPLAY_CATEGORY_LABELS.get(top_base, top_base), "top_apres": DISPLAY_CATEGORY_LABELS.get(top_after, top_after), "bascule": "oui" if top_base != target_bloc and top_after == target_bloc else "non", } ) opp_df = pd.DataFrame(opp_rows, columns=OPPORTUNITY_OUTPUT_COLUMNS) if not opp_df.empty: opp_df = opp_df.sort_values(["bascule", "gain_cible"], ascending=[False, False]) return sim_table, sim_plot, opp_df predict_inputs = [bureau_dd, target_dd, inscrits_in, abstention_in, blancs_in, nuls_in] predict_inputs += [override_inputs[cat] for cat in ordered_categories() if cat in override_inputs] predict_btn.click(_predict, inputs=predict_inputs, outputs=[output_df, source_box, chart]) history_btn.click( _history, inputs=[history_bureau_dd, history_election_dd], outputs=[history_df, history_source, history_chart, history_meta], ) map_btn.click( _map, inputs=[map_target_dd], outputs=[map_html], ) strategy_btn.click( _strategy_interval, inputs=[strategy_bureau_dd, strategy_target_dd, strategy_inscrits_in, interval_dd], outputs=[interval_df, interval_source, interval_chart], ) simulate_btn.click( _strategy_simulate, inputs=[ strategy_bureau_dd, strategy_target_dd, strategy_inscrits_in, target_bloc_dd, source_1_dd, target_1_dd, delta_1, source_2_dd, target_2_dd, delta_2, ], outputs=[sim_df, sim_chart, opportunity_df], ) return demo if __name__ == "__main__": logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s") demo = create_interface() demo.launch(server_name="0.0.0.0", server_port=7860)