File size: 4,151 Bytes
46f9144 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 |
from __future__ import annotations
import argparse
import subprocess
import sys
from pathlib import Path
PROJECT_ROOT = Path(__file__).resolve().parent
PYTHON = sys.executable
def run_step(cmd: list[str], desc: str) -> None:
print(f"\n=== {desc} ===")
result = subprocess.run(cmd, check=False)
if result.returncode != 0:
raise SystemExit(f"Echec de l'étape '{desc}' (code {result.returncode}). Commande: {' '.join(cmd)}")
def main() -> None:
parser = argparse.ArgumentParser(
description="Pipeline orchestration: preprocess -> features -> train -> predict",
)
parser.add_argument("--raw-dir", type=Path, default=Path("data/raw"), help="Répertoire des fichiers bruts.")
parser.add_argument("--mapping", type=Path, default=Path("config/nuances.yaml"), help="Mapping nuances->catégories.")
parser.add_argument("--target-election", type=str, default="municipales", help="Election cible (ex: municipales).")
parser.add_argument("--target-year", type=int, default=2026, help="Année cible.")
parser.add_argument("--commune-code", type=str, default="301", help="Code commune pour la prédiction (Sète=301).")
parser.add_argument("--skip-preprocess", action="store_true", help="Ne pas relancer le prétraitement.")
parser.add_argument("--skip-features", action="store_true", help="Ne pas reconstruire le panel.")
parser.add_argument("--skip-train", action="store_true", help="Ne pas réentraîner le modèle.")
parser.add_argument("--skip-predict", action="store_true", help="Ne pas générer les prédictions CSV.")
args = parser.parse_args()
interim_path = PROJECT_ROOT / "data" / "interim" / "elections_long.parquet"
panel_path = PROJECT_ROOT / "data" / "processed" / "panel.parquet"
model_path = PROJECT_ROOT / "models" / "hist_gradient_boosting.joblib"
if not args.skip_preprocess:
run_step(
[
PYTHON,
"-m",
"src.data.preprocess",
"--raw-dir",
str(args.raw_dir),
"--output-dir",
str(PROJECT_ROOT / "data" / "interim"),
],
"Prétraitement (format long)",
)
if not args.skip_features:
run_step(
[
PYTHON,
"-m",
"src.features.build_features",
"--elections-long",
str(interim_path),
"--mapping",
str(args.mapping),
"--output",
str(panel_path),
"--output-csv",
str(PROJECT_ROOT / "data" / "processed" / "panel.csv"),
],
"Construction du panel features+cibles",
)
if not args.skip_train:
run_step(
[
PYTHON,
"-m",
"src.model.train",
"--panel",
str(panel_path),
"--reports-dir",
str(PROJECT_ROOT / "reports"),
"--models-dir",
str(PROJECT_ROOT / "models"),
],
"Entraînement / évaluation des modèles",
)
if not args.skip_predict:
run_step(
[
PYTHON,
"-m",
"src.model.predict",
"--model-path",
str(model_path),
"--feature-columns",
str(PROJECT_ROOT / "models" / "feature_columns.json"),
"--elections-long",
str(interim_path),
"--mapping",
str(args.mapping),
"--target-election-type",
args.target_election,
"--target-year",
str(args.target_year),
"--commune-code",
args.commune_code,
"--output-dir",
str(PROJECT_ROOT / "predictions"),
],
"Génération des prédictions CSV",
)
print("\nPipeline terminé. Lance Gradio avec `python -m app.gradio_app`.")
if __name__ == "__main__":
main()
|