Spaces:

stephmnt
/

bdv

Sleeping

App Files Files Community

bdv / app /gradio_app.py

stephmnt

Sync from GitHub Actions

047370a verified about 2 months ago

raw

history blame contribute delete

72.4 kB

	from __future__ import annotations

	import base64
	import io
	import json
	import logging
	import re
	import warnings
	from html import escape
	from pathlib import Path
	from typing import Dict, Tuple

	import gradio as gr
	import joblib
	import numpy as np
	import pandas as pd
	import sqlalchemy as sa

	from src.constants import CANDIDATE_CATEGORIES
	from src.db.schema import get_engine
	from src.features.build_features import (
	aggregate_by_event,
	compute_national_reference,
	expand_by_category,
	load_elections_long,
	load_mapping,
	)

	LOGGER = logging.getLogger(__name__)
	COMMUNE_CODE_SETE = "34301"
	MODEL_DIR = Path("models")
	FEATURE_COLS_PATH = MODEL_DIR / "feature_columns.json"
	RESIDUAL_INTERVALS_PATH = Path("reports/residual_intervals.json")
	GEO_DIR = Path("data/geo")
	DEFAULT_TARGETS = [
	("municipales", 2026),
	("legislatives", 2027),
	("presidentielles", 2027),
	]
	FEATURE_CACHE: Dict[Tuple[str, int], Tuple[pd.DataFrame, Dict[str, Dict[Tuple[str, str], float]]]] = {}
	ELECTION_KEY_SEP = "\|"
	ELECTION_TYPE_LABELS = {
	"municipales": "Municipales",
	"legislatives": "Législatives",
	"presidentielles": "Présidentielles",
	"europeennes": "Européennes",
	"regionales": "Régionales",
	"departementales": "Départementales",
	}
	HISTORY_OUTPUT_COLUMNS = ["categorie", "score_%"]
	PREDICTION_OUTPUT_COLUMNS = ["categorie", "nombre"]
	INTERVAL_OUTPUT_COLUMNS = ["categorie", "baseline_%", "min_%", "max_%", "baseline", "min", "max"]
	SIM_OUTPUT_COLUMNS = ["categorie", "baseline", "apres_transfert", "delta"]
	OPPORTUNITY_OUTPUT_COLUMNS = [
	"bureau",
	"gain_cible",
	"score_base",
	"score_apres",
	"top_base",
	"top_apres",
	"bascule",
	]
	DISPLAY_CATEGORY_ORDER = [
	"extreme_gauche",
	"gauche_dure",
	"gauche_modere",
	"centre",
	"droite_modere",
	"droite_dure",
	"extreme_droite",
	]
	PREDICTION_CATEGORY_ORDER = DISPLAY_CATEGORY_ORDER + ["blancs", "nuls", "abstention"]
	DISPLAY_CATEGORY_LABELS = {
	"extreme_gauche": "extrême-gauche",
	"gauche_dure": "gauche dure",
	"gauche_modere": "gauche modérée",
	"centre": "centre",
	"droite_modere": "droite modérée",
	"droite_dure": "droite dure",
	"extreme_droite": "extrême-droite",
	"blancs": "blancs",
	"nuls": "nuls",
	"abstention": "abstentions",
	}
	DISPLAY_CATEGORY_COLORS = {
	"extreme_gauche": "#7f1d1d",
	"gauche_dure": "#dc2626",
	"gauche_modere": "#f472b6",
	"centre": "#facc15",
	"droite_modere": "#60a5fa",
	"droite_dure": "#1e3a8a",
	"extreme_droite": "#111827",
	}
	EXTRA_CATEGORY_COLORS = {
	"blancs": "#e5e7eb",
	"nuls": "#9ca3af",
	"abstention": "#6b7280",
	}
	DISPLAY_LABEL_COLORS = {
	DISPLAY_CATEGORY_LABELS[key]: color for key, color in DISPLAY_CATEGORY_COLORS.items()
	}
	DISPLAY_LABEL_COLORS.update(
	{DISPLAY_CATEGORY_LABELS[key]: color for key, color in EXTRA_CATEGORY_COLORS.items()}
	)
	CATEGORY_LABEL_TO_KEY = {label: key for key, label in DISPLAY_CATEGORY_LABELS.items()}
	TRANSFER_CATEGORY_LABELS = [DISPLAY_CATEGORY_LABELS[key] for key in PREDICTION_CATEGORY_ORDER]
	DEFAULT_RESIDUAL_SPREAD = 0.03
	INTERVAL_BANDS = {
	"80% (p10-p90)": ("q10", "q90"),
	"90% (p05-p95)": ("q05", "q95"),
	}
	NEUTRAL_MARGIN_SHARE = 0.10
	TYPE_HISTORY_BLEND = {
	"presidentielles": 0.4,
	"legislatives": 0.35,
	"europeennes": 0.3,
	"regionales": 0.3,
	"departementales": 0.3,
	"municipales": 0.2,
	}

	try:
	from numpy import RankWarning as NP_RANK_WARNING # type: ignore[attr-defined]
	except Exception:
	class NP_RANK_WARNING(UserWarning):
	pass


	def ordered_categories() -> list[str]:
	return [cat for cat in DISPLAY_CATEGORY_ORDER if cat in CANDIDATE_CATEGORIES]


	def load_residual_intervals(path: Path = RESIDUAL_INTERVALS_PATH) -> Dict[str, object]:
	if not path.exists():
	return {}
	try:
	payload = json.loads(path.read_text())
	except Exception:
	return {}
	if isinstance(payload, dict):
	return payload
	return {}


	def get_interval_bounds(
	residuals: Dict[str, Dict[str, float]],
	category: str,
	band_label: str,
	) -> Tuple[float, float]:
	keys = INTERVAL_BANDS.get(band_label, ("q10", "q90"))
	cat_resid = residuals.get(category, {})
	low = cat_resid.get(keys[0])
	high = cat_resid.get(keys[1])
	if low is None or high is None:
	return -DEFAULT_RESIDUAL_SPREAD, DEFAULT_RESIDUAL_SPREAD
	return float(low), float(high)


	def build_interval_table(
	shares_by_cat: Dict[str, float],
	exprimes_total: int,
	residuals: Dict[str, Dict[str, float]],
	band_label: str,
	) -> pd.DataFrame:
	rows = []
	for cat in ordered_categories():
	share = float(shares_by_cat.get(cat, 0.0))
	low_resid, high_resid = get_interval_bounds(residuals, cat, band_label)
	share_low = float(np.clip(share + low_resid, 0.0, 1.0))
	share_high = float(np.clip(share + high_resid, 0.0, 1.0))
	count = int(round(share * exprimes_total))
	count_low = int(round(share_low * exprimes_total))
	count_high = int(round(share_high * exprimes_total))
	if count_low > count_high:
	count_low, count_high = count_high, count_low
	share_low, share_high = share_high, share_low
	rows.append(
	{
	"categorie": DISPLAY_CATEGORY_LABELS.get(cat, cat),
	"baseline_%": round(share * 100, 1),
	"min_%": round(share_low * 100, 1),
	"max_%": round(share_high * 100, 1),
	"baseline": count,
	"min": count_low,
	"max": count_high,
	}
	)
	return pd.DataFrame(rows, columns=INTERVAL_OUTPUT_COLUMNS)


	def build_interval_chart(
	df: pd.DataFrame,
	*,
	value_col: str = "baseline",
	low_col: str = "min",
	high_col: str = "max",
	color_map: Dict[str, str] \| None = None,
	ylabel: str = "Nombre d'électeurs",
	):
	try:
	import matplotlib.pyplot as plt
	except Exception:
	return None
	if df.empty or value_col not in df.columns:
	return None
	labels = df["categorie"].astype(str).tolist()
	values = df[value_col].astype(float).to_numpy()
	low_vals = df[low_col].astype(float).to_numpy()
	high_vals = df[high_col].astype(float).to_numpy()
	lower_err = np.maximum(0.0, values - low_vals)
	upper_err = np.maximum(0.0, high_vals - values)
	yerr = np.vstack([lower_err, upper_err])
	colors = [color_map.get(label, "#3b82f6") for label in labels] if color_map else "#3b82f6"
	plt.figure(figsize=(6, 3))
	plt.bar(labels, values, color=colors, yerr=yerr, capsize=4)
	plt.xticks(rotation=30, ha="right")
	plt.ylabel(ylabel)
	plt.tight_layout()
	return plt


	def blend_with_type_history(
	preds_by_cat: Dict[str, float],
	row: pd.Series,
	target_type: str,
	) -> Dict[str, float]:
	base_weight = TYPE_HISTORY_BLEND.get(str(target_type).lower(), 0.0)
	if base_weight <= 0:
	return preds_by_cat
	available = 0
	hist_vals: Dict[str, float \| None] = {}
	for cat in CANDIDATE_CATEGORIES:
	val = row.get(f"prev_share_type_lag1_{cat}")
	if val is not None and not pd.isna(val):
	hist_vals[cat] = float(val)
	available += 1
	else:
	hist_vals[cat] = None
	if available == 0:
	return preds_by_cat
	weight = base_weight * (available / len(CANDIDATE_CATEGORIES))
	blended: Dict[str, float] = {}
	for cat in CANDIDATE_CATEGORIES:
	base = float(preds_by_cat.get(cat, 0.0))
	hist = hist_vals.get(cat)
	if hist is None:
	blended[cat] = base
	else:
	blended[cat] = (1 - weight) * base + weight * hist
	total = sum(blended.values())
	if total > 0:
	for cat in blended:
	blended[cat] /= total
	return blended


	def _normalize_override_pct(value: float \| None) -> float \| None:
	if value is None:
	return None
	try:
	val = float(value)
	except (TypeError, ValueError):
	return None
	if np.isnan(val):
	return None
	return float(np.clip(val, 0.0, 100.0))


	def apply_share_overrides(
	preds_by_cat: Dict[str, float],
	overrides_pct: Dict[str, float] \| None,
	ordered: list[str],
	) -> Dict[str, float]:
	if not overrides_pct:
	return preds_by_cat
	fixed = {}
	for cat, pct in overrides_pct.items():
	if cat not in ordered:
	continue
	norm = _normalize_override_pct(pct)
	if norm is None:
	continue
	fixed[cat] = norm / 100.0
	if not fixed:
	return preds_by_cat
	fixed_sum = sum(fixed.values())
	if fixed_sum >= 1.0:
	scaled = {cat: (val / fixed_sum) for cat, val in fixed.items() if fixed_sum > 0}
	return {cat: float(scaled.get(cat, 0.0)) for cat in ordered}
	remaining = 1.0 - fixed_sum
	residual_cats = [cat for cat in ordered if cat not in fixed]
	base_sum = sum(float(preds_by_cat.get(cat, 0.0)) for cat in residual_cats)
	if base_sum <= 0 and residual_cats:
	per_cat = remaining / len(residual_cats)
	base_alloc = {cat: per_cat for cat in residual_cats}
	else:
	base_alloc = {
	cat: (float(preds_by_cat.get(cat, 0.0)) / base_sum) * remaining
	for cat in residual_cats
	}
	merged = {cat: float(base_alloc.get(cat, 0.0)) for cat in ordered}
	for cat, val in fixed.items():
	merged[cat] = float(val)
	return merged


	def apply_transfers(
	counts: Dict[str, int],
	total_inscrits: int,
	transfers: list[Tuple[str, str, float]],
	) -> Dict[str, int]:
	updated = {key: int(value) for key, value in counts.items()}
	for source, target, delta_pct in transfers:
	if delta_pct <= 0 or source == target:
	continue
	delta_count = int(round(total_inscrits * float(delta_pct) / 100.0))
	if delta_count <= 0:
	continue
	available = max(0, int(updated.get(source, 0)))
	moved = min(available, delta_count)
	updated[source] = available - moved
	updated[target] = int(updated.get(target, 0)) + moved
	return updated


	def build_simulation_table(
	baseline: Dict[str, int],
	updated: Dict[str, int],
	) -> pd.DataFrame:
	rows = []
	for cat in PREDICTION_CATEGORY_ORDER:
	base = int(baseline.get(cat, 0))
	new = int(updated.get(cat, 0))
	rows.append(
	{
	"categorie": DISPLAY_CATEGORY_LABELS.get(cat, cat),
	"baseline": base,
	"apres_transfert": new,
	"delta": new - base,
	}
	)
	return pd.DataFrame(rows, columns=SIM_OUTPUT_COLUMNS)


	def load_geojson_features(geo_dir: Path = GEO_DIR) -> list[dict]:
	if not geo_dir.exists():
	return []
	paths = sorted(geo_dir.glob(".geojson")) + sorted(geo_dir.glob(".json"))
	features: list[dict] = []
	for path in paths:
	try:
	payload = json.loads(path.read_text())
	except Exception:
	continue
	if isinstance(payload, dict):
	features.extend(payload.get("features", []))
	return features


	def extract_bureau_number(label: str \| None) -> int \| None:
	if not label:
	return None
	match = re.search(r"(\d+)", str(label))
	if not match:
	return None
	try:
	return int(match.group(1))
	except ValueError:
	return None


	def match_bureau_code(commune_code: str, bureau_num: int, available_codes: set[str]) -> str:
	padded = str(bureau_num).zfill(4)
	candidates = [f"{commune_code}-{padded}", f"{commune_code}{padded}"]
	for candidate in candidates:
	if candidate in available_codes:
	return candidate
	return candidates[-1]


	def _iter_coords(geom: dict) -> list[Tuple[float, float]]:
	coords = []
	geom_type = geom.get("type")
	if geom_type == "Polygon":
	for ring in geom.get("coordinates", []):
	coords.extend([(lon, lat) for lon, lat in ring])
	elif geom_type == "MultiPolygon":
	for polygon in geom.get("coordinates", []):
	for ring in polygon:
	coords.extend([(lon, lat) for lon, lat in ring])
	return coords


	def geojson_bounds(features: list[dict]) -> Tuple[Tuple[float, float], Tuple[float, float]] \| None:
	lons = []
	lats = []
	for feature in features:
	geom = feature.get("geometry") or {}
	for lon, lat in _iter_coords(geom):
	lons.append(lon)
	lats.append(lat)
	if not lons or not lats:
	return None
	return (min(lats), min(lons)), (max(lats), max(lons))


	def build_prediction_table_from_counts(counts_by_cat: Dict[str, int]) -> pd.DataFrame:
	rows = []
	for cat in ordered_categories():
	rows.append({"categorie": DISPLAY_CATEGORY_LABELS.get(cat, cat), "nombre": int(counts_by_cat.get(cat, 0))})
	for extra in ["blancs", "nuls", "abstention"]:
	rows.append(
	{
	"categorie": DISPLAY_CATEGORY_LABELS[extra],
	"nombre": int(counts_by_cat.get(extra, 0)),
	}
	)
	return pd.DataFrame(rows, columns=PREDICTION_OUTPUT_COLUMNS)


	def chart_base64_from_df(
	df: pd.DataFrame,
	value_col: str,
	ylabel: str,
	color_map: Dict[str, str],
	) -> str \| None:
	try:
	import matplotlib.pyplot as plt
	except Exception:
	return None
	if df.empty or value_col not in df.columns:
	return None
	labels = df["categorie"].astype(str).tolist()
	values = pd.to_numeric(df[value_col], errors="coerce").fillna(0).tolist()
	colors = [color_map.get(label, "#3b82f6") for label in labels]
	fig, ax = plt.subplots(figsize=(4.5, 3.2))
	ax.barh(labels, values, color=colors)
	ax.invert_yaxis()
	ax.set_xlabel(ylabel)
	ax.tick_params(axis="y", labelsize=8)
	fig.tight_layout()
	buf = io.BytesIO()
	fig.savefig(buf, format="png", dpi=150)
	plt.close(fig)
	return base64.b64encode(buf.getvalue()).decode("ascii")


	def build_map_popup_html(
	bureau_label: str,
	table_df: pd.DataFrame,
	chart_b64: str \| None,
	meta: str \| None,
	) -> str:
	title_html = f"<strong>{escape(bureau_label)}</strong>"
	meta_html = f"<div style='margin:4px 0;'>{escape(meta)}</div>" if meta else ""
	table_html = table_df.to_html(index=False, border=0)
	img_html = ""
	if chart_b64:
	img_html = (
	"<div style='margin-top:6px;'>"
	f"<img src='data:image/png;base64,{chart_b64}' style='width:320px;height:auto;'/>"
	"</div>"
	)
	return f"<div style='font-size:12px;'>{title_html}{meta_html}{table_html}{img_html}</div>"


	def build_map_legend_html() -> str:
	parts = []
	for key in DISPLAY_CATEGORY_ORDER:
	label = DISPLAY_CATEGORY_LABELS.get(key, key)
	color = DISPLAY_CATEGORY_COLORS.get(key, "#9ca3af")
	parts.append(
	f"<span style='display:inline-flex;align-items:center;margin-right:10px;'>"
	f"<span style='width:12px;height:12px;background:{color};display:inline-block;margin-right:6px;border:1px solid #111827;'></span>"
	f"{escape(label)}</span>"
	)
	parts.append(
	"<span style='display:inline-flex;align-items:center;margin-right:10px;'>"
	"<span style='width:12px;height:12px;background:#ffffff;display:inline-block;margin-right:6px;border:1px solid #111827;'></span>"
	"écart gauche/droite ≤ 10%</span>"
	)
	parts.append(
	"<span style='display:inline-flex;align-items:center;margin-right:10px;'>"
	"<span style='width:12px;height:12px;background:#9ca3af;display:inline-block;margin-right:6px;border:1px solid #111827;'></span>"
	"données indisponibles</span>"
	)
	parts.append("<span style='font-size:12px;color:#6b7280;'>abstention non utilisée pour la couleur</span>")
	return "<div style='margin-bottom:8px;'>" + " ".join(parts) + "</div>"


	def build_bureau_map_html(
	backend: "PredictorBackend",
	target_type: str,
	target_year: int,
	) -> str:
	try:
	import folium
	except Exception:
	return "<p>Folium n'est pas disponible. Installe-le via requirements.txt.</p>"

	features = load_geojson_features()
	if not features:
	return "<p>Aucune geojson trouvée dans data/geo.</p>"

	bounds = geojson_bounds(features)
	if bounds is None:
	return "<p>Impossible de calculer l'emprise de la carte.</p>"
	(min_lat, min_lon), (max_lat, max_lon) = bounds
	center = [(min_lat + max_lat) / 2, (min_lon + max_lon) / 2]
	fmap = folium.Map(location=center, zoom_start=13, tiles="cartodbpositron")

	available_codes = set(backend.available_bureaux())
	for feature in features:
	props = feature.get("properties", {})
	label = props.get("name") or "Bureau"
	bureau_num = extract_bureau_number(label)
	if bureau_num is None:
	code_bv = None
	else:
	code_bv = match_bureau_code(COMMUNE_CODE_SETE, bureau_num, available_codes)

	fill_color = "#9ca3af"
	popup_html = None
	if code_bv is not None:
	details, _, meta = backend.predict_bureau_details(code_bv, target_type, target_year)
	if details is not None:
	shares = details["shares_by_cat"]
	left_share = float(shares.get("gauche_dure", 0.0) + shares.get("gauche_modere", 0.0))
	right_share = float(shares.get("droite_dure", 0.0) + shares.get("droite_modere", 0.0))
	if abs(left_share - right_share) <= NEUTRAL_MARGIN_SHARE:
	fill_color = "#ffffff"
	else:
	winner = max(shares, key=shares.get)
	fill_color = DISPLAY_CATEGORY_COLORS.get(winner, fill_color)

	table_df = build_prediction_table_from_counts(details["counts"])
	chart_b64 = chart_base64_from_df(
	table_df,
	value_col="nombre",
	ylabel="Nombre d'electeurs",
	color_map=DISPLAY_LABEL_COLORS,
	)
	popup_html = build_map_popup_html(str(label), table_df, chart_b64, meta)

	def _style(_: dict, color=fill_color):
	return {
	"fillColor": color,
	"color": "#111827",
	"weight": 1,
	"fillOpacity": 0.6,
	}

	geo = folium.GeoJson(feature, style_function=_style)
	if popup_html:
	geo.add_child(folium.Popup(popup_html, max_width=450))
	geo.add_child(folium.Tooltip(str(label)))
	geo.add_to(fmap)

	fmap.fit_bounds([[min_lat, min_lon], [max_lat, max_lon]])
	return fmap._repr_html_()


	def _project_rate(
	series: pd.Series,
	years: pd.Series,
	target_year: int,
	*,
	min_points_trend: int = 3,
	clamp_to_observed: bool = True,
	) -> float \| None:
	df = pd.DataFrame(
	{
	"value": pd.to_numeric(series, errors="coerce"),
	"year": pd.to_numeric(years, errors="coerce"),
	}
	).dropna()
	if df.empty:
	return None
	values = df["value"].to_numpy()
	years_arr = df["year"].to_numpy()
	if len(set(years_arr)) >= min_points_trend and len(df) >= min_points_trend:
	with warnings.catch_warnings():
	warnings.simplefilter("ignore", category=NP_RANK_WARNING)
	try:
	slope, intercept = np.polyfit(years_arr, values, 1)
	projected = slope * target_year + intercept
	except Exception:
	projected = values[-1]
	else:
	projected = values[-1]
	if clamp_to_observed and len(values):
	projected = min(max(projected, float(np.nanmin(values))), float(np.nanmax(values)))
	return float(min(1.0, max(0.0, projected)))


	def _allocate_counts(shares: np.ndarray, total: int) -> np.ndarray:
	if total <= 0 or shares.size == 0:
	return np.zeros_like(shares, dtype=int)
	shares = np.clip(shares, 0, None)
	if shares.sum() == 0:
	return np.zeros_like(shares, dtype=int)
	shares = shares / shares.sum()
	raw = shares * total
	floors = np.floor(raw)
	remainder = int(total - floors.sum())
	if remainder > 0:
	order = np.argsort(-(raw - floors))
	for idx in order[:remainder]:
	floors[idx] += 1
	return floors.astype(int)


	def load_bureau_event_stats(commune_code: str) -> pd.DataFrame:
	candidates = [
	Path("data/processed/elections_blocs.parquet"),
	Path("data/processed/elections_blocs.csv"),
	Path("data/interim/elections_long.parquet"),
	Path("data/interim/elections_long.csv"),
	]
	df = pd.DataFrame()
	best = pd.DataFrame()
	for path in candidates:
	if not path.exists():
	continue
	if path.suffix == ".parquet":
	df = pd.read_parquet(path)
	else:
	df = pd.read_csv(path, sep=";")
	if df.empty:
	continue
	if "type_scrutin" not in df.columns and "election_type" in df.columns:
	df["type_scrutin"] = df["election_type"]
	if "annee" not in df.columns and "election_year" in df.columns:
	df["annee"] = df["election_year"]
	if "tour" not in df.columns and "round" in df.columns:
	df["tour"] = df["round"]
	df["date_scrutin"] = pd.to_datetime(df.get("date_scrutin"), errors="coerce")
	for col in ["inscrits", "votants", "blancs", "nuls"]:
	if col in df.columns:
	df[col] = pd.to_numeric(df[col], errors="coerce")
	else:
	df[col] = np.nan
	if "code_commune" in df.columns:
	df["code_commune"] = df["code_commune"].astype(str)
	df = df[df["code_commune"] == str(commune_code)]
	else:
	df = df[df["code_bv"].astype(str).str.startswith(str(commune_code))]
	df = df.dropna(subset=["code_bv"])
	if df.empty:
	continue
	has_blancs = df["blancs"].notna().any() or df["nuls"].notna().any()
	if has_blancs:
	best = df
	break
	if best.empty:
	best = df
	df = best
	if df.empty:
	return df
	group_cols = [col for col in ["code_bv", "type_scrutin", "annee", "tour", "date_scrutin"] if col in df.columns]
	agg = df.groupby(group_cols, as_index=False).agg(
	inscrits=("inscrits", "max"),
	votants=("votants", "max"),
	blancs=("blancs", "max"),
	nuls=("nuls", "max"),
	)
	if "date_scrutin" in agg.columns:
	agg = agg.sort_values("date_scrutin")
	agg["election_type"] = agg.get("type_scrutin")
	agg["election_type"] = agg["election_type"].astype("string").str.strip().str.lower()
	agg["election_year"] = pd.to_numeric(agg.get("annee"), errors="coerce")
	agg["round"] = pd.to_numeric(agg.get("tour"), errors="coerce").fillna(1).astype(int)
	base_inscrits = agg["inscrits"].replace(0, np.nan)
	agg["turnout_pct"] = agg["votants"] / base_inscrits
	agg["blancs_pct"] = agg["blancs"] / base_inscrits
	agg["nuls_pct"] = agg["nuls"] / base_inscrits
	return agg[
	[
	"code_bv",
	"election_type",
	"election_year",
	"round",
	"date_scrutin",
	"inscrits",
	"votants",
	"blancs",
	"nuls",
	"turnout_pct",
	"blancs_pct",
	"nuls_pct",
	]
	]


	def load_commune_event_stats(commune_code: str) -> pd.DataFrame:
	candidates = [
	Path("data/processed/commune_event_stats.parquet"),
	Path("data/processed/commune_event_stats.csv"),
	]
	df = pd.DataFrame()
	for path in candidates:
	if not path.exists():
	continue
	if path.suffix == ".parquet":
	df = pd.read_parquet(path)
	else:
	df = pd.read_csv(path, sep=";")
	if not df.empty:
	break
	if df.empty:
	return df
	if "type_scrutin" not in df.columns and "election_type" in df.columns:
	df["type_scrutin"] = df["election_type"]
	if "annee" not in df.columns and "election_year" in df.columns:
	df["annee"] = df["election_year"]
	if "tour" not in df.columns and "round" in df.columns:
	df["tour"] = df["round"]
	df["date_scrutin"] = pd.to_datetime(df.get("date_scrutin"), errors="coerce")
	for col in ["inscrits", "votants", "blancs", "nuls"]:
	if col in df.columns:
	df[col] = pd.to_numeric(df[col], errors="coerce")
	else:
	df[col] = np.nan
	if "code_commune" in df.columns:
	df["code_commune"] = df["code_commune"].astype(str)
	df = df[df["code_commune"] == str(commune_code)]
	else:
	return pd.DataFrame()
	if df.empty:
	return df
	base_inscrits = df["inscrits"].replace(0, np.nan)
	if "turnout_pct" not in df.columns:
	df["turnout_pct"] = df["votants"] / base_inscrits
	if "blancs_pct" not in df.columns:
	df["blancs_pct"] = df["blancs"] / base_inscrits
	if "nuls_pct" not in df.columns:
	df["nuls_pct"] = df["nuls"] / base_inscrits
	df["election_type"] = df["type_scrutin"].astype("string").str.strip().str.lower()
	df["election_year"] = pd.to_numeric(df.get("annee"), errors="coerce")
	df["round"] = pd.to_numeric(df.get("tour"), errors="coerce").fillna(1).astype(int)
	return df[
	[
	"code_commune",
	"election_type",
	"election_year",
	"round",
	"date_scrutin",
	"inscrits",
	"votants",
	"blancs",
	"nuls",
	"turnout_pct",
	"blancs_pct",
	"nuls_pct",
	]
	]


	def format_backend_label(backend_kind: str) -> str:
	return "PostgreSQL" if backend_kind == "postgres" else "fichiers locaux"


	def format_election_type_label(election_type: str) -> str:
	label = ELECTION_TYPE_LABELS.get(election_type)
	if label:
	return label
	return str(election_type).replace("_", " ").title()


	def format_election_label(
	election_type: str,
	election_year: int,
	round_num: int,
	date_scrutin: pd.Timestamp \| None = None,
	) -> str:
	base = f"{format_election_type_label(election_type)} {election_year} - Tour {round_num}"
	if date_scrutin is None or pd.isna(date_scrutin):
	return base
	date_value = pd.to_datetime(date_scrutin).date().isoformat()
	return f"{base} ({date_value})"


	def format_election_key(election_type: str, election_year: int, round_num: int) -> str:
	return f"{election_type}{ELECTION_KEY_SEP}{election_year}{ELECTION_KEY_SEP}{round_num}"


	def parse_election_key(key: str) -> Tuple[str, int, int]:
	parts = key.split(ELECTION_KEY_SEP)
	if len(parts) != 3:
	raise ValueError(f"Clé d'élection invalide: {key!r}")
	return parts[0], int(parts[1]), int(parts[2])


	def format_bureau_label(code_bv: str, bureau_label: str \| None) -> str:
	code = str(code_bv)
	suffix = code.split("-")[-1] if "-" in code else code
	if bureau_label is not None and not pd.isna(bureau_label):
	label = str(bureau_label).strip()
	if label and label != code:
	return f"{label} ({code})"
	return f"Bureau {suffix} ({code})"


	def build_bureau_choices(history: pd.DataFrame) -> list[tuple[str, str]]:
	if history.empty:
	return []
	if "bureau_label" in history.columns:
	label_map = (
	history[["code_bv", "bureau_label"]]
	.dropna(subset=["code_bv"])
	.drop_duplicates()
	.sort_values("code_bv")
	.groupby("code_bv", as_index=False)["bureau_label"]
	.first()
	)
	return [
	(format_bureau_label(row.code_bv, row.bureau_label), row.code_bv)
	for row in label_map.itertuples(index=False)
	]
	codes = sorted(history["code_bv"].dropna().unique().tolist())
	return [(format_bureau_label(code, None), code) for code in codes]


	def build_history_choices(history: pd.DataFrame) -> list[tuple[str, str]]:
	if history.empty:
	return []
	events = (
	history[["election_type", "election_year", "round", "date_scrutin"]]
	.dropna(subset=["election_type", "election_year", "round"])
	.drop_duplicates()
	.groupby(["election_type", "election_year", "round"], as_index=False)
	.agg(date_scrutin=("date_scrutin", "min"))
	.sort_values(["election_year", "election_type", "round"])
	)
	return [
	(
	format_election_label(
	row.election_type,
	int(row.election_year),
	int(row.round),
	row.date_scrutin,
	),
	format_election_key(row.election_type, int(row.election_year), int(row.round)),
	)
	for row in events.itertuples(index=False)
	]


	def clean_history_frame(history: pd.DataFrame) -> pd.DataFrame:
	if history.empty:
	return history
	clean = history.copy()
	clean["code_bv"] = clean["code_bv"].astype("string").str.strip()
	clean["election_type"] = clean["election_type"].astype("string").str.strip().str.lower()
	clean["category"] = clean["category"].astype("string").str.strip().str.lower()
	if "bureau_label" in clean.columns:
	clean["bureau_label"] = clean["bureau_label"].astype("string").str.strip()
	clean["election_year"] = pd.to_numeric(clean["election_year"], errors="coerce")
	clean["round"] = pd.to_numeric(clean["round"], errors="coerce").fillna(1)
	clean["date_scrutin"] = pd.to_datetime(clean["date_scrutin"], errors="coerce")
	for col in ["share", "share_nat", "turnout_pct"]:
	if col in clean.columns:
	clean[col] = pd.to_numeric(clean[col], errors="coerce").clip(lower=0, upper=1)
	clean = clean.dropna(subset=["code_bv", "election_type", "election_year", "round", "category"])
	clean["election_year"] = clean["election_year"].astype(int)
	clean["round"] = clean["round"].astype(int)
	clean = clean[clean["category"].isin(CANDIDATE_CATEGORIES)]
	return clean


	def prepare_history_table(history_slice: pd.DataFrame) -> pd.DataFrame:
	if history_slice.empty:
	return pd.DataFrame(columns=HISTORY_OUTPUT_COLUMNS)
	grouped = history_slice.groupby("category", as_index=False).agg(share=("share", "sum"))
	clean = pd.DataFrame({"category": ordered_categories()}).merge(grouped, on="category", how="left")
	clean["share"] = pd.to_numeric(clean["share"], errors="coerce").fillna(0).clip(lower=0, upper=1)
	clean["score_%"] = (clean["share"] * 100).round(1)
	clean["categorie"] = clean["category"].map(DISPLAY_CATEGORY_LABELS).fillna(clean["category"])
	return clean[HISTORY_OUTPUT_COLUMNS]


	def format_history_meta(history_slice: pd.DataFrame) -> str:
	if history_slice.empty:
	return ""
	parts = []
	dates = history_slice["date_scrutin"].dropna()
	if not dates.empty:
	date_value = pd.to_datetime(dates.iloc[0]).date().isoformat()
	parts.append(f"Date du scrutin : {date_value}")
	turnout_vals = pd.to_numeric(history_slice["turnout_pct"], errors="coerce").dropna()
	if not turnout_vals.empty:
	parts.append(f"Participation : {turnout_vals.iloc[0] * 100:.1f}%")
	return " \| ".join(parts)


	def _code_bv_full(commune_code: str, bureau_code: str) -> str:
	bureau_code = str(bureau_code).zfill(4)
	return f"{commune_code}-{bureau_code}"


	def load_history_from_db(commune_code: str) -> pd.DataFrame:
	engine = get_engine()
	query = sa.text(
	"""
	select cm.insee_code as commune_code,
	b.bureau_code,
	b.bureau_label,
	e.election_type,
	e.election_year,
	coalesce(e.round, 1) as round,
	e.date as date_scrutin,
	c.name as category,
	rl.share_pct,
	rl.turnout_pct,
	rn.share_pct as share_nat
	from results_local rl
	join bureaux b on rl.bureau_id = b.id
	join communes cm on b.commune_id = cm.id
	join elections e on rl.election_id = e.id
	join categories c on rl.category_id = c.id
	left join results_national rn on rn.election_id = e.id and rn.category_id = rl.category_id
	where cm.insee_code = :commune
	"""
	)
	df = pd.read_sql(query, engine, params={"commune": commune_code})
	if df.empty:
	raise RuntimeError("Aucune donnée dans la base pour la commune demandée.")
	df["code_bv"] = df.apply(lambda r: _code_bv_full(r["commune_code"], r["bureau_code"]), axis=1)
	df["date_scrutin"] = pd.to_datetime(df["date_scrutin"])
	df["share"] = pd.to_numeric(df["share_pct"], errors="coerce") / 100
	df["share_nat"] = pd.to_numeric(df["share_nat"], errors="coerce") / 100
	df["turnout_pct"] = pd.to_numeric(df["turnout_pct"], errors="coerce") / 100
	df["election_year"] = pd.to_numeric(df["election_year"], errors="coerce")
	df["round"] = pd.to_numeric(df["round"], errors="coerce").fillna(1).astype(int)
	return df[
	[
	"commune_code",
	"code_bv",
	"bureau_label",
	"election_type",
	"election_year",
	"round",
	"date_scrutin",
	"category",
	"share",
	"share_nat",
	"turnout_pct",
	]
	]


	def load_history_from_files(commune_code: str) -> pd.DataFrame:
	elections_long_all = load_elections_long(
	Path("data/interim/elections_long.parquet"),
	commune_code=commune_code,
	)
	mapping = load_mapping(Path("data/mapping_candidats_blocs.csv"))
	expanded_all = expand_by_category(elections_long_all, mapping)
	local_all = aggregate_by_event(expanded_all)
	nat = compute_national_reference(local_all)

	local = local_all[local_all["commune_code"] == commune_code].copy()
	local = local.merge(nat, on=["election_type", "election_year", "round", "category"], how="left")
	# Columns already in aggregate_by_event/compute_national_reference
	if "share" not in local.columns:
	raise RuntimeError("Colonne share absente du dataset local (fallback fichiers).")
	local["bureau_label"] = None
	local["share_nat"] = local.get("share_nat")
	local["turnout_pct"] = local.get("turnout_pct")
	return local.rename(
	columns={
	"annee": "election_year",
	"tour": "round",
	}
	)[
	[
	"commune_code",
	"code_bv",
	"bureau_label",
	"election_type",
	"election_year",
	"round",
	"date_scrutin",
	"category",
	"share",
	"share_nat",
	"turnout_pct",
	]
	]


	def references_from_history(history: pd.DataFrame, target_year: int) -> Dict[str, Dict[Tuple[str, str], float]]:
	hist = history[history["election_year"] < target_year].copy()
	leg = (
	hist[hist["election_type"] == "legislatives"]
	.sort_values("date_scrutin")
	.groupby(["code_bv", "category"])["share"]
	.last()
	)
	mun2020 = (
	hist[(hist["election_type"] == "municipales") & (hist["election_year"] == 2020)]
	.sort_values("date_scrutin")
	.groupby(["code_bv", "category"])["share"]
	.last()
	)
	return {"leg": leg.to_dict(), "mun2020": mun2020.to_dict()}


	def build_features_from_history(history: pd.DataFrame, target_type: str, target_year: int) -> pd.DataFrame:
	hist = history[history["election_year"] < target_year].copy()
	if hist.empty:
	return pd.DataFrame()
	hist = hist.sort_values("date_scrutin")
	hist["dev_to_nat"] = hist["share"] - hist["share_nat"]

	last_any_share = hist.groupby(["code_bv", "category"])["share"].last()
	last_any_dev = hist.groupby(["code_bv", "category"])["dev_to_nat"].last()
	last_type_share = (
	hist[hist["election_type"] == target_type]
	.groupby(["code_bv", "category"])["share"]
	.last()
	)
	last_type_dev = (
	hist[hist["election_type"] == target_type]
	.groupby(["code_bv", "category"])["dev_to_nat"]
	.last()
	)
	swing_any = (
	hist.groupby(["code_bv", "category"])["share"]
	.apply(lambda s: s.iloc[-1] - s.iloc[-2] if len(s) >= 2 else np.nan)
	.rename("swing_any")
	)
	turnout_any = hist.groupby("code_bv")["turnout_pct"].last()
	turnout_type = (
	hist[hist["election_type"] == target_type]
	.groupby("code_bv")["turnout_pct"]
	.last()
	)

	bureaux = sorted(hist["code_bv"].dropna().unique())
	records = []
	for code_bv in bureaux:
	record = {
	"commune_code": str(code_bv).split("-")[0],
	"code_bv": code_bv,
	"election_type": target_type,
	"election_year": target_year,
	"round": 1,
	"date_scrutin": f"{target_year}-01-01",
	"prev_turnout_any_lag1": turnout_any.get(code_bv, np.nan),
	"prev_turnout_same_type_lag1": turnout_type.get(code_bv, np.nan),
	}
	for cat in CANDIDATE_CATEGORIES:
	record[f"prev_share_any_lag1_{cat}"] = last_any_share.get((code_bv, cat), np.nan)
	record[f"prev_share_type_lag1_{cat}"] = last_type_share.get((code_bv, cat), np.nan)
	record[f"prev_dev_to_national_any_lag1_{cat}"] = last_any_dev.get((code_bv, cat), np.nan)
	record[f"prev_dev_to_national_type_lag1_{cat}"] = last_type_dev.get((code_bv, cat), np.nan)
	record[f"swing_any_{cat}"] = swing_any.get((code_bv, cat), np.nan)
	records.append(record)
	return pd.DataFrame.from_records(records)


	def load_model() -> Path:
	best_path = MODEL_DIR / "best_model.json"
	if best_path.exists():
	try:
	payload = json.loads(best_path.read_text())
	name = payload.get("name")
	if name:
	candidate = MODEL_DIR / f"{name}.joblib"
	if candidate.exists():
	return candidate
	except Exception:
	pass
	if (MODEL_DIR / "hist_gradient_boosting.joblib").exists():
	return MODEL_DIR / "hist_gradient_boosting.joblib"
	joblibs = sorted(MODEL_DIR.glob("*.joblib"))
	if not joblibs:
	raise FileNotFoundError("Aucun modèle trouvé dans models/. Lancez src/model/train.py.")
	return joblibs[0]


	def load_feature_columns(path: Path, df: pd.DataFrame) -> list[str]:
	if path.exists():
	return json.loads(path.read_text())
	exclude = {"commune_code", "code_bv", "election_type", "election_year", "round", "date_scrutin"}
	return [c for c in df.columns if c not in exclude]


	def format_delta(value) -> str:
	if value is None or (isinstance(value, float) and np.isnan(value)):
	return "N/A"
	sign = "+" if value >= 0 else ""
	return f"{sign}{round(value, 1)}"


	class PredictorBackend:
	def __init__(self, commune_code: str = COMMUNE_CODE_SETE):
	self.commune_code = commune_code
	self.backend = "local"
	try:
	self.history = load_history_from_db(commune_code)
	self.backend = "postgres"
	LOGGER.info("Backend PostgreSQL chargé (%s lignes)", len(self.history))
	except Exception as exc:
	LOGGER.warning("PostgreSQL indisponible (%s) -> fallback fichiers.", exc)
	self.history = load_history_from_files(commune_code)
	self.backend = "files"
	LOGGER.info("Backend fichiers chargé (%s lignes)", len(self.history))
	self.history = clean_history_frame(self.history)
	self.event_stats = load_bureau_event_stats(commune_code)
	self.commune_stats = load_commune_event_stats(commune_code)
	self.default_rates = {}
	self.default_rates_by_type: dict[str, dict[str, float]] = {}
	stats = self.commune_stats if not self.commune_stats.empty else self.event_stats
	if not stats.empty:
	if "round" in stats.columns:
	round1 = stats[stats["round"] == 1]
	if not round1.empty:
	stats = round1
	self.default_rates = {
	"turnout_pct": float(stats["turnout_pct"].median(skipna=True)),
	"blancs_pct": float(stats["blancs_pct"].median(skipna=True)),
	"nuls_pct": float(stats["nuls_pct"].median(skipna=True)),
	}
	if "election_type" in stats.columns:
	for etype, group in stats.groupby("election_type"):
	self.default_rates_by_type[str(etype)] = {
	"turnout_pct": float(group["turnout_pct"].median(skipna=True)),
	"blancs_pct": float(group["blancs_pct"].median(skipna=True)),
	"nuls_pct": float(group["nuls_pct"].median(skipna=True)),
	}
	self.model_path = load_model()
	self.model = joblib.load(self.model_path)
	# feature cache per target
	self.refs_cache: Dict[Tuple[str, int], Dict[str, Dict[Tuple[str, str], float]]] = {}

	def available_bureaux(self) -> list[str]:
	return sorted(self.history["code_bv"].dropna().unique().tolist())

	def available_targets(self) -> list[Tuple[str, int]]:
	existing = set()
	for row in self.history.itertuples(index=False):
	try:
	year = int(row.election_year) # type: ignore
	except Exception:
	continue
	existing.add((row.election_type, year))
	for t in DEFAULT_TARGETS:
	existing.add(t)
	return sorted(existing, key=lambda x: (x[1], x[0]))

	def _get_features_and_refs(self, target_type: str, target_year: int) -> Tuple[pd.DataFrame, Dict[str, Dict[Tuple[str, str], float]]]:
	key = (target_type, target_year)
	if key not in FEATURE_CACHE:
	feature_df = build_features_from_history(self.history, target_type, target_year)
	refs = references_from_history(self.history, target_year)
	FEATURE_CACHE[key] = (feature_df, refs)
	return FEATURE_CACHE[key]

	def predict_bureau_details(
	self,
	code_bv: str,
	target_type: str,
	target_year: int,
	inscrits_override: float \| None = None,
	share_overrides: Dict[str, float] \| None = None,
	abstention_override_pct: float \| None = None,
	blancs_override_pct: float \| None = None,
	nuls_override_pct: float \| None = None,
	) -> Tuple[Dict[str, object] \| None, str, str]:
	feature_df, _ = self._get_features_and_refs(target_type, target_year)
	if feature_df.empty:
	return None, "Données insuffisantes", ""
	row = feature_df[feature_df["code_bv"] == code_bv].copy()
	if row.empty:
	return None, "Bureau non trouvé dans l'historique.", ""

	feature_cols = load_feature_columns(FEATURE_COLS_PATH, feature_df)
	missing = [c for c in feature_cols if c not in row.columns]
	for col in missing:
	row[col] = np.nan
	preds = self.model.predict(row[feature_cols])
	preds = np.clip(preds, 0, 1)
	sums = preds.sum(axis=1, keepdims=True)
	sums[sums == 0] = 1
	preds = preds / sums
	preds_share = preds.flatten()

	preds_by_cat = {cat: float(preds_share[idx]) for idx, cat in enumerate(CANDIDATE_CATEGORIES)}
	preds_by_cat = blend_with_type_history(preds_by_cat, row.iloc[0], target_type)
	ordered = ordered_categories()
	preds_by_cat = apply_share_overrides(preds_by_cat, share_overrides, ordered)
	share_vec = np.array([preds_by_cat.get(cat, 0.0) for cat in ordered], dtype=float)

	stats = self.event_stats[self.event_stats["code_bv"] == code_bv].sort_values("date_scrutin")
	inscrits_used = None
	if inscrits_override is not None:
	try:
	value = float(inscrits_override)
	if value > 0:
	inscrits_used = value
	except (TypeError, ValueError):
	inscrits_used = None
	if inscrits_used is None and not stats.empty:
	serie = pd.to_numeric(stats["inscrits"], errors="coerce").dropna()
	if not serie.empty:
	inscrits_used = float(serie.iloc[-1])
	if inscrits_used is None:
	return None, "Inscrits indisponibles pour ce bureau.", ""

	def pick_rate(col: str) -> float:
	default = self.default_rates.get(col, 0.0)
	default = 0.0 if default is None or np.isnan(default) else float(default)
	type_default = self.default_rates_by_type.get(target_type, {}).get(col)
	if type_default is None or np.isnan(type_default):
	type_default = default

	bureau_scoped = self.event_stats
	if not bureau_scoped.empty and "round" in bureau_scoped.columns:
	round1 = bureau_scoped[bureau_scoped["round"] == 1]
	if not round1.empty:
	bureau_scoped = round1

	series = None
	years = None
	if (
	not bureau_scoped.empty
	and col in bureau_scoped.columns
	and "election_type" in bureau_scoped.columns
	):
	if target_type in bureau_scoped["election_type"].values:
	mask = bureau_scoped["election_type"] == target_type
	series = bureau_scoped.loc[mask, col]
	years = bureau_scoped.loc[mask, "election_year"]

	if series is None and not self.commune_stats.empty and col in self.commune_stats.columns:
	commune_scoped = self.commune_stats
	if "round" in commune_scoped.columns:
	round1 = commune_scoped[commune_scoped["round"] == 1]
	if not round1.empty:
	commune_scoped = round1
	if target_type in commune_scoped["election_type"].values:
	mask = commune_scoped["election_type"] == target_type
	series = commune_scoped.loc[mask, col]
	years = commune_scoped.loc[mask, "election_year"]
	else:
	series = commune_scoped[col]
	years = commune_scoped["election_year"]

	if series is None:
	if bureau_scoped.empty or col not in bureau_scoped.columns:
	return type_default
	series = bureau_scoped[col]
	years = bureau_scoped["election_year"]

	rate = _project_rate(series, years, target_year)
	if rate is None or np.isnan(rate):
	return type_default
	return float(rate)

	turnout_rate = pick_rate("turnout_pct")
	blancs_rate = pick_rate("blancs_pct")
	nuls_rate = pick_rate("nuls_pct")
	abstention_override = _normalize_override_pct(abstention_override_pct)
	if abstention_override is not None:
	turnout_rate = float(np.clip(1.0 - (abstention_override / 100.0), 0.0, 1.0))
	blancs_override = _normalize_override_pct(blancs_override_pct)
	if blancs_override is not None:
	blancs_rate = float(blancs_override / 100.0)
	nuls_override = _normalize_override_pct(nuls_override_pct)
	if nuls_override is not None:
	nuls_rate = float(nuls_override / 100.0)
	if blancs_rate + nuls_rate > turnout_rate and (blancs_rate + nuls_rate) > 0:
	scale = turnout_rate / (blancs_rate + nuls_rate)
	blancs_rate *= scale
	nuls_rate *= scale

	inscrits_total = int(round(inscrits_used))
	votants_total = int(round(inscrits_total * turnout_rate))
	blancs_total = int(round(inscrits_total * blancs_rate))
	nuls_total = int(round(inscrits_total * nuls_rate))
	if blancs_total + nuls_total > votants_total and (blancs_total + nuls_total) > 0:
	scale = votants_total / (blancs_total + nuls_total)
	blancs_total = int(round(blancs_total * scale))
	nuls_total = int(round(nuls_total * scale))
	exprimes_total = max(0, votants_total - blancs_total - nuls_total)
	abstention_total = max(0, inscrits_total - votants_total)

	bloc_counts = _allocate_counts(share_vec, exprimes_total)
	counts_by_cat = {cat: int(count) for cat, count in zip(ordered, bloc_counts)}
	counts_by_cat.update(
	{
	"blancs": int(blancs_total),
	"nuls": int(nuls_total),
	"abstention": int(abstention_total),
	}
	)
	backend_label = format_backend_label(self.backend)
	meta = (
	f"Inscrits utilisés : {inscrits_total} \| Votants : {votants_total} \| "
	f"Blancs : {blancs_total} \| Nuls : {nuls_total} \| Abstentions : {abstention_total}"
	)
	details = {
	"shares_by_cat": preds_by_cat,
	"share_vec": share_vec,
	"ordered": ordered,
	"counts": counts_by_cat,
	"totals": {
	"inscrits": inscrits_total,
	"votants": votants_total,
	"blancs": blancs_total,
	"nuls": nuls_total,
	"abstention": abstention_total,
	"exprimes": exprimes_total,
	},
	}
	return details, backend_label, meta

	def predict_bureau(
	self,
	code_bv: str,
	target_type: str,
	target_year: int,
	inscrits_override: float \| None = None,
	share_overrides: Dict[str, float] \| None = None,
	abstention_override_pct: float \| None = None,
	blancs_override_pct: float \| None = None,
	nuls_override_pct: float \| None = None,
	) -> Tuple[pd.DataFrame, str, str]:
	details, backend_label, meta = self.predict_bureau_details(
	code_bv,
	target_type,
	target_year,
	inscrits_override,
	share_overrides,
	abstention_override_pct,
	blancs_override_pct,
	nuls_override_pct,
	)
	if details is None:
	return pd.DataFrame(), backend_label, ""
	counts_by_cat = details["counts"]
	ordered = details["ordered"]
	rows = []
	for cat in ordered:
	rows.append(
	{
	"categorie": DISPLAY_CATEGORY_LABELS.get(cat, cat),
	"nombre": int(counts_by_cat.get(cat, 0)),
	}
	)
	for extra in ["blancs", "nuls", "abstention"]:
	rows.append(
	{
	"categorie": DISPLAY_CATEGORY_LABELS[extra],
	"nombre": int(counts_by_cat.get(extra, 0)),
	}
	)
	return pd.DataFrame(rows), backend_label, meta


	def build_bar_chart(
	df: pd.DataFrame,
	value_col: str,
	*,
	color: str = "#3b82f6",
	color_map: Dict[str, str] \| None = None,
	category_col: str = "categorie",
	ylabel: str = "Score (%)",
	):
	try:
	import matplotlib.pyplot as plt
	except Exception:
	return None
	if df.empty or value_col not in df.columns:
	return None
	plt.figure(figsize=(6, 3))
	labels = df[category_col].astype(str).tolist() if category_col in df.columns else []
	if color_map:
	colors = [color_map.get(label, color) for label in labels]
	else:
	colors = color
	plt.bar(labels, df[value_col], color=colors)
	plt.xticks(rotation=30, ha="right")
	plt.ylabel(ylabel)
	plt.tight_layout()
	return plt


	def create_interface() -> gr.Blocks:
	backend = PredictorBackend()
	bureau_choices = build_bureau_choices(backend.history)
	bureau_labels = [label for label, _ in bureau_choices]
	bureau_map = {label: value for label, value in bureau_choices}
	bureau_label_by_code = {value: label for label, value in bureau_choices}
	targets = backend.available_targets()
	target_labels = [f"{t} {y}" for t, y in targets]
	history_choices = build_history_choices(backend.history)
	history_labels = [label for label, _ in history_choices]
	history_map = {label: value for label, value in history_choices}
	if ("municipales", 2026) in targets:
	default_target = "municipales 2026"
	elif targets:
	default_target = f"{targets[-1][0]} {targets[-1][1]}"
	else:
	default_target = "municipales 2026"
	default_bv = bureau_labels[0] if bureau_labels else None
	default_history = history_labels[-1] if history_labels else None
	backend_label = format_backend_label(backend.backend)
	residual_payload = load_residual_intervals()
	residuals = residual_payload.get("residuals", {}) if isinstance(residual_payload, dict) else {}
	residual_model = residual_payload.get("model", "inconnu") if isinstance(residual_payload, dict) else "inconnu"
	interval_choices = list(INTERVAL_BANDS.keys()) or ["80% (p10-p90)"]
	interval_default = interval_choices[0]
	bloc_labels = [DISPLAY_CATEGORY_LABELS.get(cat, cat) for cat in ordered_categories()]

	with gr.Blocks(title="Prévision Municipales — Ville de Sète") as demo:
	gr.Markdown(
	"""
	# Prévision Municipales — Ville de Sète
	Choisissez un bureau de vote et une élection cible.
	Le modèle estime un volume par catégorie politique, ainsi que les abstentions, blancs et nuls.

	Auteur : [Stéphane Manet](https://manet-conseil.fr) - [Linkedin](https://www.linkedin.com/in/stephanemanet) - [GitHub](https://github.com/stephmnt)
	"""
	)
	with gr.Tabs():
	with gr.Tab("Prévisions"):
	with gr.Row():
	bureau_dd = gr.Dropdown(choices=bureau_labels, value=default_bv, label="Bureau de vote")
	target_dd = gr.Dropdown(choices=target_labels, value=default_target, label="Élection cible (type année)")
	inscrits_in = gr.Number(value=None, label="Inscrits (optionnel)", precision=0)
	override_inputs: Dict[str, gr.Number] = {}
	with gr.Accordion("Imputation manuelle (optionnel)", open=False):
	gr.Markdown("Abstention / blancs / nuls en % des inscrits.")
	with gr.Row():
	abstention_in = gr.Number(value=40, label="Abstention (% inscrits)", precision=1)
	blancs_in = gr.Number(value=None, label="Blancs (% inscrits)", precision=1)
	nuls_in = gr.Number(value=None, label="Nuls (% inscrits)", precision=1)
	gr.Markdown("Nuances politiques en % des exprimés (laisser vide pour garder le modèle).")
	cats = ordered_categories()
	with gr.Row():
	for cat in cats[:4]:
	override_inputs[cat] = gr.Number(
	value=None,
	label=DISPLAY_CATEGORY_LABELS.get(cat, cat),
	precision=1,
	)
	with gr.Row():
	for cat in cats[4:]:
	override_inputs[cat] = gr.Number(
	value=None,
	label=DISPLAY_CATEGORY_LABELS.get(cat, cat),
	precision=1,
	)
	predict_btn = gr.Button("Prédire")
	source_box = gr.Markdown(value=f"Source des données : {backend_label}")
	output_df = gr.Dataframe(
	headers=PREDICTION_OUTPUT_COLUMNS,
	label="Prédictions (nombres)",
	)
	chart = gr.Plot()

	with gr.Tab("Historique"):
	gr.Markdown(
	"""
	Consultation des résultats passés (sans machine learning).
	Sélectionnez un bureau et une élection pour afficher l'histogramme des parts par tendance politique.
	"""
	)
	with gr.Row():
	history_bureau_dd = gr.Dropdown(choices=bureau_labels, value=default_bv, label="Bureau de vote")
	history_election_dd = gr.Dropdown(
	choices=history_labels,
	value=default_history,
	label="Élection (type année tour)",
	)
	history_btn = gr.Button("Afficher l'historique")
	history_source = gr.Markdown(value=f"Source des données : {backend_label}")
	history_df = gr.Dataframe(headers=HISTORY_OUTPUT_COLUMNS, label="Résultats historiques")
	history_chart = gr.Plot()
	history_meta = gr.Markdown()

	with gr.Tab("Carte"):
	gr.Markdown(
	"""
	Carte des bureaux de vote de Sète.
	Cliquez sur un polygone pour afficher la prédiction (table + graphique).
	"""
	)
	map_legend = gr.HTML(value=build_map_legend_html())
	with gr.Row():
	map_target_dd = gr.Dropdown(
	choices=target_labels,
	value=default_target,
	label="Élection cible (type année)",
	)
	map_btn = gr.Button("Afficher la carte")
	map_html = gr.HTML(value="<p>Cliquez sur 'Afficher la carte' pour charger la carte.</p>")

	with gr.Tab("Stratégie"):
	gr.Markdown(
	"""
	Analyse stratégique par bureau : intervalles d'incertitude issus des résidus CV,
	puis simulateur de transferts pour estimer des bascules potentielles.
	"""
	)
	with gr.Row():
	strategy_bureau_dd = gr.Dropdown(choices=bureau_labels, value=default_bv, label="Bureau de vote")
	strategy_target_dd = gr.Dropdown(
	choices=target_labels,
	value=default_target,
	label="Élection cible (type année)",
	)
	strategy_inscrits_in = gr.Number(value=None, label="Inscrits (optionnel)", precision=0)
	interval_dd = gr.Dropdown(
	choices=interval_choices,
	value=interval_default,
	label="Intervalle CV",
	)
	strategy_btn = gr.Button("Analyser l'incertitude")
	interval_source = gr.Markdown(
	value=(
	f"Intervalle CV basé sur le modèle : {residual_model}"
	if residuals
	else "Intervalle CV indisponible (fallback ±3%)."
	)
	)
	interval_df = gr.Dataframe(
	headers=INTERVAL_OUTPUT_COLUMNS,
	label="Plage empirique par bloc",
	)
	interval_chart = gr.Plot()

	gr.Markdown("### Simulateur de transferts (points d'inscrits)")
	with gr.Row():
	target_bloc_dd = gr.Dropdown(choices=bloc_labels, value=bloc_labels[0] if bloc_labels else None, label="Bloc cible")
	with gr.Row():
	source_1_dd = gr.Dropdown(choices=TRANSFER_CATEGORY_LABELS, value=DISPLAY_CATEGORY_LABELS["abstention"], label="Source 1")
	target_1_dd = gr.Dropdown(choices=TRANSFER_CATEGORY_LABELS, value=DISPLAY_CATEGORY_LABELS["droite_dure"], label="Cible 1")
	delta_1 = gr.Slider(minimum=0, maximum=10, value=3, step=0.1, label="Delta 1 (points %)")
	with gr.Row():
	source_2_dd = gr.Dropdown(choices=TRANSFER_CATEGORY_LABELS, value=DISPLAY_CATEGORY_LABELS["droite_modere"], label="Source 2")
	target_2_dd = gr.Dropdown(choices=TRANSFER_CATEGORY_LABELS, value=DISPLAY_CATEGORY_LABELS["gauche_modere"], label="Cible 2")
	delta_2 = gr.Slider(minimum=0, maximum=10, value=3, step=0.1, label="Delta 2 (points %)")
	simulate_btn = gr.Button("Simuler les transferts")
	sim_df = gr.Dataframe(headers=SIM_OUTPUT_COLUMNS, label="Simulation par catégorie")
	sim_chart = gr.Plot()
	opportunity_df = gr.Dataframe(headers=OPPORTUNITY_OUTPUT_COLUMNS, label="Bureaux à potentiel (trié)")

	def _predict(
	bv_label: str,
	target_label: str,
	inscrits_override: float \| None,
	abstention_override: float \| None,
	blancs_override: float \| None,
	nuls_override: float \| None,
	*cat_overrides: float,
	):
	if not bv_label or not target_label:
	return pd.DataFrame(), "Entrée invalide", None
	code_bv = bureau_map.get(bv_label)
	if not code_bv:
	return pd.DataFrame(), "Bureau invalide", None
	try:
	parts = target_label.split()
	target_type, target_year = parts[0].lower(), int(parts[1])
	except Exception:
	target_type, target_year = "municipales", 2026
	share_overrides: Dict[str, float] = {}
	for cat, value in zip(ordered_categories(), cat_overrides):
	norm = _normalize_override_pct(value)
	if norm is None:
	continue
	share_overrides[cat] = norm
	df, backend_label, meta = backend.predict_bureau(
	code_bv,
	target_type,
	target_year,
	inscrits_override,
	share_overrides=share_overrides if share_overrides else None,
	abstention_override_pct=abstention_override,
	blancs_override_pct=blancs_override,
	nuls_override_pct=nuls_override,
	)
	plot = build_bar_chart(
	df,
	value_col="nombre",
	ylabel="Nombre d'électeurs",
	color_map=DISPLAY_LABEL_COLORS,
	)
	meta_label = f" \| {meta}" if meta else ""
	return df, f"Source des données : {backend_label}{meta_label}", plot

	def _parse_target_label(target_label: str) -> Tuple[str, int]:
	try:
	parts = target_label.split()
	return parts[0].lower(), int(parts[1])
	except Exception:
	return "municipales", 2026

	def _map(target_label: str):
	if not target_label:
	return "<p>Élection invalide.</p>"
	target_type, target_year = _parse_target_label(target_label)
	return build_bureau_map_html(backend, target_type, target_year)

	def _history(bv_label: str, election_label: str):
	if not bv_label or not election_label:
	empty = pd.DataFrame(columns=HISTORY_OUTPUT_COLUMNS)
	return empty, "Entrée invalide", None, ""
	code_bv = bureau_map.get(bv_label)
	if not code_bv:
	empty = pd.DataFrame(columns=HISTORY_OUTPUT_COLUMNS)
	return empty, "Bureau invalide", None, ""
	election_key = history_map.get(election_label)
	if not election_key:
	empty = pd.DataFrame(columns=HISTORY_OUTPUT_COLUMNS)
	return empty, "Élection invalide", None, ""
	try:
	election_type, election_year, round_num = parse_election_key(election_key)
	except Exception:
	empty = pd.DataFrame(columns=HISTORY_OUTPUT_COLUMNS)
	return empty, "Élection invalide", None, ""
	history_slice = backend.history[
	(backend.history["code_bv"] == code_bv)
	& (backend.history["election_type"] == election_type)
	& (backend.history["election_year"] == election_year)
	& (backend.history["round"] == round_num)
	].copy()
	if history_slice.empty:
	empty = pd.DataFrame(columns=HISTORY_OUTPUT_COLUMNS)
	return empty, f"Source des données : {backend_label}", None, "Aucun résultat pour ce bureau."
	table = prepare_history_table(history_slice)
	plot = build_bar_chart(
	table,
	value_col="score_%",
	ylabel="Score (%)",
	color_map=DISPLAY_LABEL_COLORS,
	)
	meta = format_history_meta(history_slice)
	return table, f"Source des données : {backend_label}", plot, meta

	def _strategy_interval(
	bv_label: str,
	target_label: str,
	inscrits_override: float \| None,
	band_label: str,
	):
	empty = pd.DataFrame(columns=INTERVAL_OUTPUT_COLUMNS)
	if not bv_label or not target_label:
	return empty, "Entrée invalide", None
	code_bv = bureau_map.get(bv_label)
	if not code_bv:
	return empty, "Bureau invalide", None
	target_type, target_year = _parse_target_label(target_label)
	details, backend_label_local, _ = backend.predict_bureau_details(
	code_bv,
	target_type,
	target_year,
	inscrits_override,
	)
	if details is None:
	return empty, backend_label_local, None
	totals = details["totals"]
	exprimes_total = int(totals.get("exprimes", 0))
	table = build_interval_table(
	details["shares_by_cat"],
	exprimes_total,
	residuals,
	band_label,
	)
	plot = build_interval_chart(table, color_map=DISPLAY_LABEL_COLORS)
	source = (
	f"Intervalle CV ({band_label}) basé sur le modèle : {residual_model}"
	if residuals
	else "Intervalle CV indisponible (fallback ±3%)."
	)
	return table, source, plot

	def _strategy_simulate(
	bv_label: str,
	target_label: str,
	inscrits_override: float \| None,
	bloc_cible_label: str,
	source_1: str,
	target_1: str,
	delta_1_val: float,
	source_2: str,
	target_2: str,
	delta_2_val: float,
	):
	empty_sim = pd.DataFrame(columns=SIM_OUTPUT_COLUMNS)
	empty_oppo = pd.DataFrame(columns=OPPORTUNITY_OUTPUT_COLUMNS)
	if not bv_label or not target_label:
	return empty_sim, None, empty_oppo
	code_bv = bureau_map.get(bv_label)
	if not code_bv:
	return empty_sim, None, empty_oppo
	target_type, target_year = _parse_target_label(target_label)
	details, _, _ = backend.predict_bureau_details(
	code_bv,
	target_type,
	target_year,
	inscrits_override,
	)
	if details is None:
	return empty_sim, None, empty_oppo

	transfers = []
	for src_label, dst_label, delta in [
	(source_1, target_1, delta_1_val),
	(source_2, target_2, delta_2_val),
	]:
	src_key = CATEGORY_LABEL_TO_KEY.get(src_label)
	dst_key = CATEGORY_LABEL_TO_KEY.get(dst_label)
	if src_key and dst_key and delta and delta > 0:
	transfers.append((src_key, dst_key, float(delta)))

	counts = details["counts"]
	totals = details["totals"]
	inscrits_total = int(totals.get("inscrits", 0))
	updated = apply_transfers(counts, inscrits_total, transfers)
	sim_table = build_simulation_table(counts, updated)
	sim_plot = build_bar_chart(
	sim_table,
	value_col="apres_transfert",
	ylabel="Nombre d'électeurs",
	color_map=DISPLAY_LABEL_COLORS,
	)

	target_bloc = CATEGORY_LABEL_TO_KEY.get(bloc_cible_label, bloc_cible_label)
	opp_rows = []
	if target_bloc in ordered_categories():
	for bv_code in backend.available_bureaux():
	override = inscrits_override if bv_code == code_bv else None
	bv_details, _, _ = backend.predict_bureau_details(
	bv_code,
	target_type,
	target_year,
	override,
	)
	if bv_details is None:
	continue
	base_counts = bv_details["counts"]
	bv_totals = bv_details["totals"]
	bv_inscrits = int(bv_totals.get("inscrits", 0))
	updated_counts = apply_transfers(base_counts, bv_inscrits, transfers)
	bloc_counts = {cat: int(base_counts.get(cat, 0)) for cat in ordered_categories()}
	updated_blocs = {cat: int(updated_counts.get(cat, 0)) for cat in ordered_categories()}
	top_base = max(bloc_counts, key=bloc_counts.get) if bloc_counts else None
	top_after = max(updated_blocs, key=updated_blocs.get) if updated_blocs else None
	gain = int(updated_counts.get(target_bloc, 0) - base_counts.get(target_bloc, 0))
	opp_rows.append(
	{
	"bureau": bureau_label_by_code.get(bv_code, bv_code),
	"gain_cible": gain,
	"score_base": int(base_counts.get(target_bloc, 0)),
	"score_apres": int(updated_counts.get(target_bloc, 0)),
	"top_base": DISPLAY_CATEGORY_LABELS.get(top_base, top_base),
	"top_apres": DISPLAY_CATEGORY_LABELS.get(top_after, top_after),
	"bascule": "oui" if top_base != target_bloc and top_after == target_bloc else "non",
	}
	)
	opp_df = pd.DataFrame(opp_rows, columns=OPPORTUNITY_OUTPUT_COLUMNS)
	if not opp_df.empty:
	opp_df = opp_df.sort_values(["bascule", "gain_cible"], ascending=[False, False])
	return sim_table, sim_plot, opp_df

	predict_inputs = [bureau_dd, target_dd, inscrits_in, abstention_in, blancs_in, nuls_in]
	predict_inputs += [override_inputs[cat] for cat in ordered_categories() if cat in override_inputs]
	predict_btn.click(_predict, inputs=predict_inputs, outputs=[output_df, source_box, chart])
	history_btn.click(
	_history,
	inputs=[history_bureau_dd, history_election_dd],
	outputs=[history_df, history_source, history_chart, history_meta],
	)
	map_btn.click(
	_map,
	inputs=[map_target_dd],
	outputs=[map_html],
	)
	strategy_btn.click(
	_strategy_interval,
	inputs=[strategy_bureau_dd, strategy_target_dd, strategy_inscrits_in, interval_dd],
	outputs=[interval_df, interval_source, interval_chart],
	)
	simulate_btn.click(
	_strategy_simulate,
	inputs=[
	strategy_bureau_dd,
	strategy_target_dd,
	strategy_inscrits_in,
	target_bloc_dd,
	source_1_dd,
	target_1_dd,
	delta_1,
	source_2_dd,
	target_2_dd,
	delta_2,
	],
	outputs=[sim_df, sim_chart, opportunity_df],
	)
	return demo


	if __name__ == "__main__":
	logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s")
	demo = create_interface()
	demo.launch(server_name="0.0.0.0", server_port=7860)