File size: 4,151 Bytes
46f9144
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
from __future__ import annotations

import argparse
import subprocess
import sys
from pathlib import Path


PROJECT_ROOT = Path(__file__).resolve().parent
PYTHON = sys.executable


def run_step(cmd: list[str], desc: str) -> None:
    print(f"\n=== {desc} ===")
    result = subprocess.run(cmd, check=False)
    if result.returncode != 0:
        raise SystemExit(f"Echec de l'étape '{desc}' (code {result.returncode}). Commande: {' '.join(cmd)}")


def main() -> None:
    parser = argparse.ArgumentParser(
        description="Pipeline orchestration: preprocess -> features -> train -> predict",
    )
    parser.add_argument("--raw-dir", type=Path, default=Path("data/raw"), help="Répertoire des fichiers bruts.")
    parser.add_argument("--mapping", type=Path, default=Path("config/nuances.yaml"), help="Mapping nuances->catégories.")
    parser.add_argument("--target-election", type=str, default="municipales", help="Election cible (ex: municipales).")
    parser.add_argument("--target-year", type=int, default=2026, help="Année cible.")
    parser.add_argument("--commune-code", type=str, default="301", help="Code commune pour la prédiction (Sète=301).")
    parser.add_argument("--skip-preprocess", action="store_true", help="Ne pas relancer le prétraitement.")
    parser.add_argument("--skip-features", action="store_true", help="Ne pas reconstruire le panel.")
    parser.add_argument("--skip-train", action="store_true", help="Ne pas réentraîner le modèle.")
    parser.add_argument("--skip-predict", action="store_true", help="Ne pas générer les prédictions CSV.")
    args = parser.parse_args()

    interim_path = PROJECT_ROOT / "data" / "interim" / "elections_long.parquet"
    panel_path = PROJECT_ROOT / "data" / "processed" / "panel.parquet"
    model_path = PROJECT_ROOT / "models" / "hist_gradient_boosting.joblib"

    if not args.skip_preprocess:
        run_step(
            [
                PYTHON,
                "-m",
                "src.data.preprocess",
                "--raw-dir",
                str(args.raw_dir),
                "--output-dir",
                str(PROJECT_ROOT / "data" / "interim"),
            ],
            "Prétraitement (format long)",
        )

    if not args.skip_features:
        run_step(
            [
                PYTHON,
                "-m",
                "src.features.build_features",
                "--elections-long",
                str(interim_path),
                "--mapping",
                str(args.mapping),
                "--output",
                str(panel_path),
                "--output-csv",
                str(PROJECT_ROOT / "data" / "processed" / "panel.csv"),
            ],
            "Construction du panel features+cibles",
        )

    if not args.skip_train:
        run_step(
            [
                PYTHON,
                "-m",
                "src.model.train",
                "--panel",
                str(panel_path),
                "--reports-dir",
                str(PROJECT_ROOT / "reports"),
                "--models-dir",
                str(PROJECT_ROOT / "models"),
            ],
            "Entraînement / évaluation des modèles",
        )

    if not args.skip_predict:
        run_step(
            [
                PYTHON,
                "-m",
                "src.model.predict",
                "--model-path",
                str(model_path),
                "--feature-columns",
                str(PROJECT_ROOT / "models" / "feature_columns.json"),
                "--elections-long",
                str(interim_path),
                "--mapping",
                str(args.mapping),
                "--target-election-type",
                args.target_election,
                "--target-year",
                str(args.target_year),
                "--commune-code",
                args.commune_code,
                "--output-dir",
                str(PROJECT_ROOT / "predictions"),
            ],
            "Génération des prédictions CSV",
        )

    print("\nPipeline terminé. Lance Gradio avec `python -m app.gradio_app`.")


if __name__ == "__main__":
    main()