stephmnt commited on
Commit
fa3d628
·
verified ·
1 Parent(s): 23b1977

Sync from GitHub via hub-sync

Browse files
artifacts/models/p1_historical_metadata.json CHANGED
@@ -1,5 +1,6 @@
1
  {
2
  "artifact_role": "P1_historical_prediction_model",
 
3
  "training_notebook": "notebooks/experience_1.ipynb",
4
  "training_script": "scripts/experience_1.py",
5
  "training_entrypoint": "scripts/experience_1.py",
@@ -12,7 +13,7 @@
12
  "parameter_grid_size": 3,
13
  "tuning_stage": "systematic_grid_search",
14
  "regularization_profile": "parameter_grid_search",
15
- "trained_at_utc": "2026-05-08T13:57:00.000704+00:00",
16
  "dataset_source": "/Users/steph/Code/Python/Jupyter/OCR_Projet12/artifacts/experiments/experience_1/dataset_consolide_historique_colonnes.csv",
17
  "target_year": 2016,
18
  "target_column": "target_yield_t_ha_2016",
@@ -105,12 +106,44 @@
105
  "area_role": "group_only_not_feature",
106
  "split_strategy": "GroupShuffleSplit(area, test_size=0.2, random_state=42)",
107
  "metrics": {
108
- "test_rmse": 2.0592505240003227,
109
  "test_mae": 0.8025563824483579,
110
  "test_r2": 0.9468391265704531,
111
- "cv_val_rmse_mean": 1.5814357123886047,
112
- "cv_val_mae_mean": 0.6463164060176476,
113
  "cv_val_r2_mean": 0.9623090308612253
114
  },
115
- "mlflow_run_id": "89a0b166132d4e9f91b8b520eaa34ed3"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  }
 
1
  {
2
  "artifact_role": "P1_historical_prediction_model",
3
+ "runtime_model_role": "historical",
4
  "training_notebook": "notebooks/experience_1.ipynb",
5
  "training_script": "scripts/experience_1.py",
6
  "training_entrypoint": "scripts/experience_1.py",
 
13
  "parameter_grid_size": 3,
14
  "tuning_stage": "systematic_grid_search",
15
  "regularization_profile": "parameter_grid_search",
16
+ "trained_at_utc": "2026-05-10T22:36:57.656730+00:00",
17
  "dataset_source": "/Users/steph/Code/Python/Jupyter/OCR_Projet12/artifacts/experiments/experience_1/dataset_consolide_historique_colonnes.csv",
18
  "target_year": 2016,
19
  "target_column": "target_yield_t_ha_2016",
 
106
  "area_role": "group_only_not_feature",
107
  "split_strategy": "GroupShuffleSplit(area, test_size=0.2, random_state=42)",
108
  "metrics": {
109
+ "test_rmse": 2.059250524000323,
110
  "test_mae": 0.8025563824483579,
111
  "test_r2": 0.9468391265704531,
112
+ "cv_val_rmse_mean": 1.581435712388605,
113
+ "cv_val_mae_mean": 0.6463164060176475,
114
  "cv_val_r2_mean": 0.9623090308612253
115
  },
116
+ "mlflow_run_id": "1b8857069dc941109703fbee6fb2b61a",
117
+ "registered_model_name": "p1_historical_pipeline",
118
+ "registered_model_version": "7",
119
+ "registered_model_run_id": "8dee2459e8b84ccba75596514fd5a70a",
120
+ "model_uri": "models:/p1_historical_pipeline/7",
121
+ "registry_source_run_id": "8dee2459e8b84ccba75596514fd5a70a",
122
+ "registered_model_stage": "None",
123
+ "registered_model_source": "models:/m-45ba375e4c5345adad84f2ea32d9df9f",
124
+ "tracking_uri": "sqlite:////Users/steph/Code/Python/Jupyter/OCR_Projet12/artifacts/mlflow.db",
125
+ "exported_at_utc": "2026-05-10T22:37:27.427985+00:00",
126
+ "artifact_path": "artifacts/models/p1_historical_pipeline.joblib",
127
+ "metadata_path": "artifacts/models/p1_historical_metadata.json",
128
+ "output_path": "artifacts/models/p1_historical_pipeline.joblib",
129
+ "output_metadata_path": "artifacts/models/p1_historical_metadata.json",
130
+ "role": "historical",
131
+ "source_run_name": "experience_1__runtime_historical",
132
+ "source_experiment_id": "4",
133
+ "source_run_metrics": {
134
+ "test_rmse": 2.059250524000323,
135
+ "test_mae": 0.8025563824483579,
136
+ "test_r2": 0.9468391265704531,
137
+ "cv_val_rmse_mean": 1.581435712388605,
138
+ "cv_val_mae_mean": 0.6463164060176475,
139
+ "cv_val_r2_mean": 0.9623090308612253
140
+ },
141
+ "source_run_params": {
142
+ "experience_name": "experience_1",
143
+ "runtime_model_role": "historical",
144
+ "registered_model_name": "p1_historical_pipeline",
145
+ "training_entrypoint": "scripts/experience_1.py",
146
+ "target_year": "2016",
147
+ "best_candidate_model_name": "random_forest_search_01"
148
+ }
149
  }
artifacts/models/p1_historical_pipeline.joblib CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:edbb02f829fbb315e6c51ea7a64c856da2e3585fe6b50a9a3e40b671f41200f4
3
- size 3646243
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:807b6071dc5a98528ac45dc0a799af64bfa0ace91f3d855eb392adce5a529242
3
+ size 3646451
artifacts/models/p23_simulation_metadata.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "model_name": "linear_regression",
3
- "trained_at_utc": "2026-05-08T13:57:01.647971+00:00",
4
  "dataset_source": "data/simulation/crop_yield.csv",
5
  "feature_columns": [
6
  "region",
@@ -22,5 +22,36 @@
22
  "test_r2": 0.9139501848982343
23
  },
24
  "strategy": "2_models_3_predictions_combined",
25
- "role": "local_adjustment_model_for_P2_and_P3"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  }
 
1
  {
2
  "model_name": "linear_regression",
3
+ "trained_at_utc": "2026-05-10T22:37:15.882903+00:00",
4
  "dataset_source": "data/simulation/crop_yield.csv",
5
  "feature_columns": [
6
  "region",
 
22
  "test_r2": 0.9139501848982343
23
  },
24
  "strategy": "2_models_3_predictions_combined",
25
+ "role": "simulation",
26
+ "runtime_model_role": "simulation",
27
+ "registered_model_name": "p23_simulation_pipeline",
28
+ "registered_model_version": "6",
29
+ "registered_model_run_id": "7a9fc5eba2a146058618994287c53538",
30
+ "model_uri": "models:/p23_simulation_pipeline/6",
31
+ "registered_model_stage": "None",
32
+ "registered_model_source": "models:/m-1d4b5fa94ca945809af904589800a72a",
33
+ "tracking_uri": "sqlite:////Users/steph/Code/Python/Jupyter/OCR_Projet12/artifacts/mlflow.db",
34
+ "exported_at_utc": "2026-05-10T22:37:27.484951+00:00",
35
+ "artifact_path": "artifacts/models/p23_simulation_pipeline.joblib",
36
+ "metadata_path": "artifacts/models/p23_simulation_metadata.json",
37
+ "output_path": "artifacts/models/p23_simulation_pipeline.joblib",
38
+ "output_metadata_path": "artifacts/models/p23_simulation_metadata.json",
39
+ "source_run_name": "simulation_runtime__runtime_model",
40
+ "source_experiment_id": "5",
41
+ "source_run_metrics": {
42
+ "train_rmse": 0.49987819477652967,
43
+ "train_mae": 0.39889442485099674,
44
+ "train_r2": 0.9130634145187704,
45
+ "test_rmse": 0.49668933266948173,
46
+ "test_mae": 0.39606497055687334,
47
+ "test_r2": 0.9139501848982343
48
+ },
49
+ "source_run_params": {
50
+ "runtime_model_role": "simulation",
51
+ "registered_model_name": "p23_simulation_pipeline",
52
+ "training_entrypoint": "scripts/train_simulation_model.py",
53
+ "model_name": "linear_regression",
54
+ "dataset_source": "data/simulation/crop_yield.csv",
55
+ "sample_size": "200000"
56
+ }
57
  }
artifacts/models/p23_simulation_pipeline.joblib CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:592f4e1bc4c3edca2960f59b5daea515a5bc845ce4117de327d1e8c0280509b7
3
- size 4820
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb2405e6848232fddcd2a15c44384decf4f0b3d98f2d5b0e948296f172c34ec7
3
+ size 4870
scripts/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (269 Bytes). View file
 
scripts/__pycache__/mlflow_config.cpython-312.pyc ADDED
Binary file (3.2 kB). View file
 
scripts/__pycache__/runtime_model_specs.cpython-312.pyc ADDED
Binary file (1.6 kB). View file
 
scripts/deployment_payload.py CHANGED
@@ -9,15 +9,23 @@ from __future__ import annotations
9
  import argparse
10
  from pathlib import Path
11
  import shutil
 
12
 
13
 
14
  PROJECT_ROOT = Path(__file__).resolve().parents[1]
 
 
 
 
 
 
 
15
 
16
  DEPLOYMENT_REQUIRED_ARTIFACTS = [
17
- Path("artifacts/models/p1_historical_pipeline.joblib"),
18
- Path("artifacts/models/p1_historical_metadata.json"),
19
- Path("artifacts/models/p23_simulation_pipeline.joblib"),
20
- Path("artifacts/models/p23_simulation_metadata.json"),
21
  Path("artifacts/experiments/experience_1/dataset_consolide_historique_colonnes.csv"),
22
  ]
23
 
@@ -44,10 +52,22 @@ PAYLOAD_FILE_SPECS = [
44
  (Path("data/dataset_consolide.csv"), Path("data/dataset_consolide.csv")),
45
  (Path("data/simulation/crop_yield.csv"), Path("data/simulation/crop_yield.csv")),
46
  (Path("main.py"), Path("main.py")),
47
- (Path("artifacts/models/p1_historical_pipeline.joblib"), Path("artifacts/models/p1_historical_pipeline.joblib")),
48
- (Path("artifacts/models/p1_historical_metadata.json"), Path("artifacts/models/p1_historical_metadata.json")),
49
- (Path("artifacts/models/p23_simulation_pipeline.joblib"), Path("artifacts/models/p23_simulation_pipeline.joblib")),
50
- (Path("artifacts/models/p23_simulation_metadata.json"), Path("artifacts/models/p23_simulation_metadata.json")),
 
 
 
 
 
 
 
 
 
 
 
 
51
  (
52
  Path("artifacts/experiments/experience_1/dataset_consolide_historique_colonnes.csv"),
53
  Path("artifacts/experiments/experience_1/dataset_consolide_historique_colonnes.csv"),
 
9
  import argparse
10
  from pathlib import Path
11
  import shutil
12
+ import sys
13
 
14
 
15
  PROJECT_ROOT = Path(__file__).resolve().parents[1]
16
+ if str(PROJECT_ROOT) not in sys.path:
17
+ sys.path.insert(0, str(PROJECT_ROOT))
18
+
19
+ from scripts.runtime_model_specs import (
20
+ HISTORICAL_RUNTIME_MODEL_SPEC,
21
+ SIMULATION_RUNTIME_MODEL_SPEC,
22
+ )
23
 
24
  DEPLOYMENT_REQUIRED_ARTIFACTS = [
25
+ HISTORICAL_RUNTIME_MODEL_SPEC.output_model_path.relative_to(PROJECT_ROOT),
26
+ HISTORICAL_RUNTIME_MODEL_SPEC.output_metadata_path.relative_to(PROJECT_ROOT),
27
+ SIMULATION_RUNTIME_MODEL_SPEC.output_model_path.relative_to(PROJECT_ROOT),
28
+ SIMULATION_RUNTIME_MODEL_SPEC.output_metadata_path.relative_to(PROJECT_ROOT),
29
  Path("artifacts/experiments/experience_1/dataset_consolide_historique_colonnes.csv"),
30
  ]
31
 
 
52
  (Path("data/dataset_consolide.csv"), Path("data/dataset_consolide.csv")),
53
  (Path("data/simulation/crop_yield.csv"), Path("data/simulation/crop_yield.csv")),
54
  (Path("main.py"), Path("main.py")),
55
+ (
56
+ HISTORICAL_RUNTIME_MODEL_SPEC.output_model_path.relative_to(PROJECT_ROOT),
57
+ HISTORICAL_RUNTIME_MODEL_SPEC.output_model_path.relative_to(PROJECT_ROOT),
58
+ ),
59
+ (
60
+ HISTORICAL_RUNTIME_MODEL_SPEC.output_metadata_path.relative_to(PROJECT_ROOT),
61
+ HISTORICAL_RUNTIME_MODEL_SPEC.output_metadata_path.relative_to(PROJECT_ROOT),
62
+ ),
63
+ (
64
+ SIMULATION_RUNTIME_MODEL_SPEC.output_model_path.relative_to(PROJECT_ROOT),
65
+ SIMULATION_RUNTIME_MODEL_SPEC.output_model_path.relative_to(PROJECT_ROOT),
66
+ ),
67
+ (
68
+ SIMULATION_RUNTIME_MODEL_SPEC.output_metadata_path.relative_to(PROJECT_ROOT),
69
+ SIMULATION_RUNTIME_MODEL_SPEC.output_metadata_path.relative_to(PROJECT_ROOT),
70
+ ),
71
  (
72
  Path("artifacts/experiments/experience_1/dataset_consolide_historique_colonnes.csv"),
73
  Path("artifacts/experiments/experience_1/dataset_consolide_historique_colonnes.csv"),
scripts/experience_1.py CHANGED
@@ -44,13 +44,20 @@ PROJECT_ROOT = Path(__file__).resolve().parents[1]
44
  if str(PROJECT_ROOT) not in sys.path:
45
  sys.path.insert(0, str(PROJECT_ROOT))
46
 
47
- from scripts.mlflow_logging import log_named_sklearn_model
 
 
 
 
 
 
48
  from scripts.project_config import DEFAULT_CONFIG_PATH, load_preparation_config
 
49
 
50
 
51
  SEED = 42
52
  CV_N_SPLITS = 4
53
- MLFLOW_EXPERIMENT_NAME = "experience_1"
54
  SEARCH_SPACE_DEFINITION = {
55
  "search_method": "parameter_grid",
56
  "scope": "all_candidate_families",
@@ -310,9 +317,9 @@ def build_experience_paths(
310
  cv_dir.mkdir(parents=True, exist_ok=True)
311
  models_dir.mkdir(parents=True, exist_ok=True)
312
 
313
- resolved_tracking_uri = tracking_uri or f"sqlite:///{(artifacts_dir / 'mlflow.db').resolve()}"
314
  mlflow_db_path = Path(resolved_tracking_uri.removeprefix("sqlite:///")).resolve()
315
- mlflow_artifacts_dir = artifacts_dir / "mlruns"
316
  mlflow_experiment_artifact_dir = mlflow_artifacts_dir / MLFLOW_EXPERIMENT_NAME
317
  mlflow_experiment_artifact_dir.mkdir(parents=True, exist_ok=True)
318
 
@@ -1246,6 +1253,7 @@ def export_p1_artifact(
1246
 
1247
  p1_metadata = {
1248
  "artifact_role": "P1_historical_prediction_model",
 
1249
  "training_notebook": "notebooks/experience_1.ipynb",
1250
  "training_script": "scripts/experience_1.py",
1251
  "training_entrypoint": "scripts/experience_1.py",
@@ -1277,6 +1285,41 @@ def export_p1_artifact(
1277
  "mlflow_run_id": str(results_df.loc[0, "run_id"]) if "run_id" in results_df.columns else None,
1278
  }
1279
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1280
  joblib.dump(p1_pipeline, paths.p1_model_path)
1281
  paths.p1_metadata_path.write_text(
1282
  json.dumps(p1_metadata, indent=2, ensure_ascii=True),
@@ -1305,7 +1348,7 @@ def run_experience_1(
1305
  """
1306
  resolved_config_path = Path(config_path) if config_path is not None else DEFAULT_CONFIG_PATH
1307
  config = load_preparation_config(resolved_config_path, ensure_dirs=True)
1308
- resolved_tracking_uri = tracking_uri or f"sqlite:///{(Path(config['ARTIFACTS_DIR']) / 'mlflow.db').resolve()}"
1309
  paths = build_experience_paths(
1310
  artifacts_dir=Path(config["ARTIFACTS_DIR"]),
1311
  tracking_uri=resolved_tracking_uri,
@@ -1362,6 +1405,10 @@ def run_experience_1(
1362
  "best_test_rmse": float(results_df.loc[0, "test_rmse"]),
1363
  "best_test_r2": float(results_df.loc[0, "test_r2"]),
1364
  "tracked_models": list(results_df["model"]),
 
 
 
 
1365
  "p1_metadata": p1_metadata,
1366
  }
1367
 
 
44
  if str(PROJECT_ROOT) not in sys.path:
45
  sys.path.insert(0, str(PROJECT_ROOT))
46
 
47
+ from scripts.mlflow_logging import log_and_register_sklearn_model, log_named_sklearn_model
48
+ from scripts.mlflow_config import (
49
+ DEFAULT_MLFLOW_TRACKING_URI,
50
+ EXPERIENCE_1_EXPERIMENT_NAME,
51
+ mlflow_artifacts_dir_for_tracking_uri,
52
+ normalize_tracking_uri,
53
+ )
54
  from scripts.project_config import DEFAULT_CONFIG_PATH, load_preparation_config
55
+ from scripts.runtime_model_specs import HISTORICAL_RUNTIME_MODEL_SPEC
56
 
57
 
58
  SEED = 42
59
  CV_N_SPLITS = 4
60
+ MLFLOW_EXPERIMENT_NAME = EXPERIENCE_1_EXPERIMENT_NAME
61
  SEARCH_SPACE_DEFINITION = {
62
  "search_method": "parameter_grid",
63
  "scope": "all_candidate_families",
 
317
  cv_dir.mkdir(parents=True, exist_ok=True)
318
  models_dir.mkdir(parents=True, exist_ok=True)
319
 
320
+ resolved_tracking_uri = normalize_tracking_uri(tracking_uri or DEFAULT_MLFLOW_TRACKING_URI)
321
  mlflow_db_path = Path(resolved_tracking_uri.removeprefix("sqlite:///")).resolve()
322
+ mlflow_artifacts_dir = mlflow_artifacts_dir_for_tracking_uri(resolved_tracking_uri)
323
  mlflow_experiment_artifact_dir = mlflow_artifacts_dir / MLFLOW_EXPERIMENT_NAME
324
  mlflow_experiment_artifact_dir.mkdir(parents=True, exist_ok=True)
325
 
 
1253
 
1254
  p1_metadata = {
1255
  "artifact_role": "P1_historical_prediction_model",
1256
+ "runtime_model_role": HISTORICAL_RUNTIME_MODEL_SPEC.role,
1257
  "training_notebook": "notebooks/experience_1.ipynb",
1258
  "training_script": "scripts/experience_1.py",
1259
  "training_entrypoint": "scripts/experience_1.py",
 
1285
  "mlflow_run_id": str(results_df.loc[0, "run_id"]) if "run_id" in results_df.columns else None,
1286
  }
1287
 
1288
+ with mlflow.start_run(run_name=f"{MLFLOW_EXPERIMENT_NAME}__runtime_historical") as runtime_run:
1289
+ mlflow.log_param("experience_name", MLFLOW_EXPERIMENT_NAME)
1290
+ mlflow.log_param("runtime_model_role", HISTORICAL_RUNTIME_MODEL_SPEC.role)
1291
+ mlflow.log_param("registered_model_name", HISTORICAL_RUNTIME_MODEL_SPEC.registered_model_name)
1292
+ mlflow.log_param("training_entrypoint", "scripts/experience_1.py")
1293
+ mlflow.log_param("target_year", context.target_year)
1294
+ mlflow.log_param("best_candidate_model_name", best_model_name)
1295
+ mlflow.log_metric("test_rmse", p1_metadata["metrics"]["test_rmse"])
1296
+ mlflow.log_metric("test_mae", p1_metadata["metrics"]["test_mae"])
1297
+ mlflow.log_metric("test_r2", p1_metadata["metrics"]["test_r2"])
1298
+ mlflow.log_metric("cv_val_rmse_mean", p1_metadata["metrics"]["cv_val_rmse_mean"])
1299
+ mlflow.log_metric("cv_val_mae_mean", p1_metadata["metrics"]["cv_val_mae_mean"])
1300
+ mlflow.log_metric("cv_val_r2_mean", p1_metadata["metrics"]["cv_val_r2_mean"])
1301
+ mlflow.log_artifact(str(paths.dataset_path))
1302
+ mlflow.log_artifact(str(paths.model_results_path))
1303
+ runtime_registration = log_and_register_sklearn_model(
1304
+ p1_pipeline,
1305
+ artifact_name=HISTORICAL_RUNTIME_MODEL_SPEC.registered_model_name,
1306
+ registered_model_name=HISTORICAL_RUNTIME_MODEL_SPEC.registered_model_name,
1307
+ model_metadata={
1308
+ "runtime_model_role": HISTORICAL_RUNTIME_MODEL_SPEC.role,
1309
+ "training_entrypoint": "scripts/experience_1.py",
1310
+ },
1311
+ )
1312
+
1313
+ p1_metadata.update(
1314
+ {
1315
+ "registered_model_name": runtime_registration["registered_model_name"],
1316
+ "registered_model_version": runtime_registration["registered_model_version"],
1317
+ "registered_model_run_id": runtime_registration["run_id"],
1318
+ "model_uri": runtime_registration["model_uri"],
1319
+ "registry_source_run_id": runtime_run.info.run_id,
1320
+ }
1321
+ )
1322
+
1323
  joblib.dump(p1_pipeline, paths.p1_model_path)
1324
  paths.p1_metadata_path.write_text(
1325
  json.dumps(p1_metadata, indent=2, ensure_ascii=True),
 
1348
  """
1349
  resolved_config_path = Path(config_path) if config_path is not None else DEFAULT_CONFIG_PATH
1350
  config = load_preparation_config(resolved_config_path, ensure_dirs=True)
1351
+ resolved_tracking_uri = normalize_tracking_uri(tracking_uri or DEFAULT_MLFLOW_TRACKING_URI)
1352
  paths = build_experience_paths(
1353
  artifacts_dir=Path(config["ARTIFACTS_DIR"]),
1354
  tracking_uri=resolved_tracking_uri,
 
1405
  "best_test_rmse": float(results_df.loc[0, "test_rmse"]),
1406
  "best_test_r2": float(results_df.loc[0, "test_r2"]),
1407
  "tracked_models": list(results_df["model"]),
1408
+ "registered_model_name": p1_metadata.get("registered_model_name"),
1409
+ "registered_model_version": p1_metadata.get("registered_model_version"),
1410
+ "registered_model_run_id": p1_metadata.get("registered_model_run_id"),
1411
+ "model_uri": p1_metadata.get("model_uri"),
1412
  "p1_metadata": p1_metadata,
1413
  }
1414
 
scripts/mlflow_config.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Configuration MLflow commune aux scripts et a l'interface locale."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+
7
+
8
+ PROJECT_ROOT = Path(__file__).resolve().parents[1]
9
+ MLFLOW_DB_PATH = (PROJECT_ROOT / "artifacts" / "mlflow.db").resolve()
10
+ MLFLOW_ARTIFACTS_DIR = (PROJECT_ROOT / "artifacts" / "mlruns").resolve()
11
+ DEFAULT_MLFLOW_TRACKING_URI = f"sqlite:///{MLFLOW_DB_PATH}"
12
+
13
+ EXPERIENCE_1_EXPERIMENT_NAME = "experience_1"
14
+ SIMULATION_RUNTIME_EXPERIMENT_NAME = "simulation_runtime"
15
+ FULL_PIPELINE_EXPERIMENT_NAME = "run_full_pipeline"
16
+
17
+
18
+ def normalize_tracking_uri(tracking_uri: str | None = None) -> str:
19
+ """Retourne un tracking URI MLflow stable depuis la racine du projet."""
20
+ resolved_uri = tracking_uri or DEFAULT_MLFLOW_TRACKING_URI
21
+ if not resolved_uri.startswith("sqlite:///"):
22
+ return resolved_uri
23
+
24
+ db_path = Path(resolved_uri.removeprefix("sqlite:///"))
25
+ if not db_path.is_absolute():
26
+ db_path = (PROJECT_ROOT / db_path).resolve()
27
+ db_path.parent.mkdir(parents=True, exist_ok=True)
28
+ return f"sqlite:///{db_path}"
29
+
30
+
31
+ def ensure_mlflow_directories() -> None:
32
+ """Cree les dossiers MLflow attendus par le projet."""
33
+ MLFLOW_DB_PATH.parent.mkdir(parents=True, exist_ok=True)
34
+ MLFLOW_ARTIFACTS_DIR.mkdir(parents=True, exist_ok=True)
35
+
36
+
37
+ def mlflow_artifacts_dir_for_tracking_uri(tracking_uri: str | None = None) -> Path:
38
+ """Retourne la racine d'artefacts adaptee au tracking URI fourni."""
39
+ resolved_uri = normalize_tracking_uri(tracking_uri)
40
+ if resolved_uri == DEFAULT_MLFLOW_TRACKING_URI:
41
+ artifact_root = MLFLOW_ARTIFACTS_DIR
42
+ elif resolved_uri.startswith("sqlite:///"):
43
+ artifact_root = Path(resolved_uri.removeprefix("sqlite:///")).resolve().parent / "mlruns"
44
+ else:
45
+ artifact_root = MLFLOW_ARTIFACTS_DIR
46
+
47
+ artifact_root.mkdir(parents=True, exist_ok=True)
48
+ return artifact_root
49
+
50
+
51
+ def experiment_artifact_location(experiment_name: str, tracking_uri: str | None = None) -> str:
52
+ """Retourne l'emplacement d'artefacts standard d'une experience MLflow."""
53
+ artifact_dir = mlflow_artifacts_dir_for_tracking_uri(tracking_uri) / experiment_name
54
+ artifact_dir.mkdir(parents=True, exist_ok=True)
55
+ return artifact_dir.resolve().as_uri()
scripts/mlflow_logging.py CHANGED
@@ -3,6 +3,7 @@
3
  from __future__ import annotations
4
 
5
  import json
 
6
  from pathlib import Path
7
  from typing import Any
8
 
@@ -10,6 +11,30 @@ import mlflow
10
  import mlflow.pyfunc
11
  import mlflow.sklearn
12
  import pandas as pd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
 
15
  def sanitize_logged_model_name(raw_name: str) -> str:
@@ -53,6 +78,89 @@ def log_named_sklearn_model(estimator: Any, *, model_name: str) -> str:
53
  return logged_model_name
54
 
55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  class EvaluationPredictionLookupModel(mlflow.pyfunc.PythonModel):
57
  """MLflow pyfunc model exposing precomputed evaluation predictions by key lookup.
58
 
 
3
  from __future__ import annotations
4
 
5
  import json
6
+ import logging
7
  from pathlib import Path
8
  from typing import Any
9
 
 
11
  import mlflow.pyfunc
12
  import mlflow.sklearn
13
  import pandas as pd
14
+ from mlflow.tracking import MlflowClient
15
+
16
+
17
+ SKLEARN_PICKLE_WARNING_PREFIX = (
18
+ "Saving scikit-learn models in the pickle or cloudpickle format requires exercising caution"
19
+ )
20
+
21
+
22
+ class _SuppressSklearnPickleWarning(logging.Filter):
23
+ """Filtre le warning MLflow repete sur la serialisation pickle/cloudpickle."""
24
+
25
+ def filter(self, record: logging.LogRecord) -> bool:
26
+ """Retourne `False` uniquement pour le warning verbeux attendu."""
27
+ return SKLEARN_PICKLE_WARNING_PREFIX not in record.getMessage()
28
+
29
+
30
+ def configure_mlflow_sklearn_logging() -> None:
31
+ """Rend les logs MLflow sklearn lisibles pendant les entrainements longs."""
32
+ logger = logging.getLogger("mlflow.sklearn")
33
+ if not any(isinstance(item, _SuppressSklearnPickleWarning) for item in logger.filters):
34
+ logger.addFilter(_SuppressSklearnPickleWarning())
35
+
36
+
37
+ configure_mlflow_sklearn_logging()
38
 
39
 
40
  def sanitize_logged_model_name(raw_name: str) -> str:
 
78
  return logged_model_name
79
 
80
 
81
+ def _registered_model_version_sort_key(version: Any) -> tuple[int, str]:
82
+ """Produit une cle de tri robuste pour les versions du registry MLflow."""
83
+ raw_version = str(getattr(version, "version", version))
84
+ return (int(raw_version), raw_version) if raw_version.isdigit() else (-1, raw_version)
85
+
86
+
87
+ def resolve_registered_model_version_for_run(
88
+ *,
89
+ registered_model_name: str,
90
+ run_id: str,
91
+ tracking_uri: str | None = None,
92
+ ) -> Any:
93
+ """Recupere la version du registry associee a un run MLflow donne.
94
+
95
+ Args:
96
+ registered_model_name: Nom du registered model a inspecter.
97
+ run_id: Identifiant du run source.
98
+ tracking_uri: Tracking URI MLflow optionnel.
99
+
100
+ Returns:
101
+ Any: Objet version retourne par le client MLflow.
102
+ """
103
+ client = MlflowClient(tracking_uri=tracking_uri)
104
+ versions = [
105
+ version
106
+ for version in client.search_model_versions(f"name = '{registered_model_name}'")
107
+ if str(getattr(version, "run_id", "")) == str(run_id)
108
+ ]
109
+ if not versions:
110
+ raise RuntimeError(
111
+ "Registered model version could not be resolved for "
112
+ f"model={registered_model_name!r} and run_id={run_id!r}."
113
+ )
114
+ return max(versions, key=_registered_model_version_sort_key)
115
+
116
+
117
+ def log_and_register_sklearn_model(
118
+ estimator: Any,
119
+ *,
120
+ artifact_name: str,
121
+ registered_model_name: str,
122
+ model_metadata: dict[str, Any] | None = None,
123
+ await_registration_for: int = 300,
124
+ ) -> dict[str, str]:
125
+ """Journalise un estimateur et l'enregistre comme registered model MLflow.
126
+
127
+ Args:
128
+ estimator: Estimateur scikit-learn a enregistrer.
129
+ artifact_name: Nom de l'artefact de run.
130
+ registered_model_name: Nom du registered model cible.
131
+ model_metadata: Metadonnees MLflow optionnelles.
132
+ await_registration_for: Duree d'attente maximale de l'enregistrement.
133
+
134
+ Returns:
135
+ dict[str, str]: Contexte de registry resolu apres l'enregistrement.
136
+ """
137
+ active_run = mlflow.active_run()
138
+ if active_run is None:
139
+ raise RuntimeError("An active MLflow run is required before registering a model.")
140
+
141
+ logged_model_name = sanitize_logged_model_name(artifact_name)
142
+ model_info = mlflow.sklearn.log_model(
143
+ estimator,
144
+ name=logged_model_name,
145
+ registered_model_name=registered_model_name,
146
+ metadata=model_metadata,
147
+ await_registration_for=await_registration_for,
148
+ )
149
+ resolved_version = resolve_registered_model_version_for_run(
150
+ registered_model_name=registered_model_name,
151
+ run_id=active_run.info.run_id,
152
+ tracking_uri=mlflow.get_tracking_uri(),
153
+ )
154
+ return {
155
+ "logged_model_name": logged_model_name,
156
+ "registered_model_name": registered_model_name,
157
+ "registered_model_version": str(resolved_version.version),
158
+ "model_uri": f"models:/{registered_model_name}/{resolved_version.version}",
159
+ "run_id": active_run.info.run_id,
160
+ "logged_model_uri": str(getattr(model_info, "model_uri", "")),
161
+ }
162
+
163
+
164
  class EvaluationPredictionLookupModel(mlflow.pyfunc.PythonModel):
165
  """MLflow pyfunc model exposing precomputed evaluation predictions by key lookup.
166
 
scripts/prediction_adjustment.py CHANGED
@@ -24,15 +24,19 @@ from sklearn.model_selection import train_test_split
24
  from sklearn.pipeline import Pipeline
25
  from sklearn.preprocessing import OneHotEncoder
26
 
 
 
 
 
27
  from scripts.simulation_dataset import load_normalized_simulation_dataset
28
 
29
  PROJECT_ROOT = Path(__file__).resolve().parents[1]
30
  HISTORICAL_WIDE_DATASET_PATH = PROJECT_ROOT / "artifacts/experiments/experience_1/dataset_consolide_historique_colonnes.csv"
31
- HISTORICAL_MODEL_PATH = PROJECT_ROOT / "artifacts/models/p1_historical_pipeline.joblib"
32
- HISTORICAL_METADATA_PATH = PROJECT_ROOT / "artifacts/models/p1_historical_metadata.json"
33
  SIMULATION_DATASET_PATH = PROJECT_ROOT / "data/simulation/crop_yield.csv"
34
- SIMULATION_MODEL_PATH = PROJECT_ROOT / "artifacts/models/p23_simulation_pipeline.joblib"
35
- SIMULATION_METADATA_PATH = PROJECT_ROOT / "artifacts/models/p23_simulation_metadata.json"
36
 
37
  SEED = 42
38
  SIMULATION_SAMPLE_SIZE = 200_000
 
24
  from sklearn.pipeline import Pipeline
25
  from sklearn.preprocessing import OneHotEncoder
26
 
27
+ from scripts.runtime_model_specs import (
28
+ HISTORICAL_RUNTIME_MODEL_SPEC,
29
+ SIMULATION_RUNTIME_MODEL_SPEC,
30
+ )
31
  from scripts.simulation_dataset import load_normalized_simulation_dataset
32
 
33
  PROJECT_ROOT = Path(__file__).resolve().parents[1]
34
  HISTORICAL_WIDE_DATASET_PATH = PROJECT_ROOT / "artifacts/experiments/experience_1/dataset_consolide_historique_colonnes.csv"
35
+ HISTORICAL_MODEL_PATH = HISTORICAL_RUNTIME_MODEL_SPEC.output_model_path
36
+ HISTORICAL_METADATA_PATH = HISTORICAL_RUNTIME_MODEL_SPEC.output_metadata_path
37
  SIMULATION_DATASET_PATH = PROJECT_ROOT / "data/simulation/crop_yield.csv"
38
+ SIMULATION_MODEL_PATH = SIMULATION_RUNTIME_MODEL_SPEC.output_model_path
39
+ SIMULATION_METADATA_PATH = SIMULATION_RUNTIME_MODEL_SPEC.output_metadata_path
40
 
41
  SEED = 42
42
  SIMULATION_SAMPLE_SIZE = 200_000
scripts/project_config.py CHANGED
@@ -1,4 +1,4 @@
1
- """Centralise le chargement de la configuration de preparation du projet."""
2
 
3
  from pathlib import Path
4
 
@@ -10,7 +10,7 @@ DEFAULT_CONFIG_PATH = PROJECT_ROOT / "config" / "project_paths.yaml"
10
 
11
 
12
  def _resolve_path(raw_path: str) -> Path:
13
- """Resout un chemin de configuration relativement a la racine du projet."""
14
  path = Path(raw_path)
15
  if path.is_absolute():
16
  return path
@@ -18,13 +18,13 @@ def _resolve_path(raw_path: str) -> Path:
18
 
19
 
20
  def ensure_preparation_directories(config: dict[str, object]) -> dict[str, object]:
21
- """Cree les dossiers cibles attendus par la preparation.
22
 
23
  Args:
24
  config: Configuration chargee depuis `project_paths.yaml`.
25
 
26
  Returns:
27
- dict[str, object]: Configuration inchangee, pour permettre le chainage.
28
  """
29
  artifacts_dir = config["ARTIFACTS_DIR"]
30
  pca_artifacts_dir = config["PCA_ARTIFACTS_DIR"]
@@ -45,14 +45,14 @@ def load_preparation_config(
45
  *,
46
  ensure_dirs: bool = False,
47
  ) -> dict[str, object]:
48
- """Charge la configuration de preparation depuis le fichier YAML du projet.
49
 
50
  Args:
51
  config_path: Chemin optionnel vers un fichier YAML de configuration.
52
- ensure_dirs: Cree les dossiers cibles si `True`.
53
 
54
  Returns:
55
- dict[str, object]: Configuration normalisee avec des `Path` resolus.
56
  """
57
  path = config_path or DEFAULT_CONFIG_PATH
58
  raw_config = yaml.safe_load(path.read_text())
 
1
+ """Centralise le chargement de la configuration de préparation du projet."""
2
 
3
  from pathlib import Path
4
 
 
10
 
11
 
12
  def _resolve_path(raw_path: str) -> Path:
13
+ """Résout un chemin de configuration relativement à la racine du projet."""
14
  path = Path(raw_path)
15
  if path.is_absolute():
16
  return path
 
18
 
19
 
20
  def ensure_preparation_directories(config: dict[str, object]) -> dict[str, object]:
21
+ """Crée les dossiers cibles attendus par la préparation.
22
 
23
  Args:
24
  config: Configuration chargee depuis `project_paths.yaml`.
25
 
26
  Returns:
27
+ dict[str, object]: Configuration inchangée, pour permettre le chainage.
28
  """
29
  artifacts_dir = config["ARTIFACTS_DIR"]
30
  pca_artifacts_dir = config["PCA_ARTIFACTS_DIR"]
 
45
  *,
46
  ensure_dirs: bool = False,
47
  ) -> dict[str, object]:
48
+ """Charge la configuration de préparation depuis le fichier YAML du projet.
49
 
50
  Args:
51
  config_path: Chemin optionnel vers un fichier YAML de configuration.
52
+ ensure_dirs: Crée les dossiers cibles si `True`.
53
 
54
  Returns:
55
+ dict[str, object]: Configuration normalisée avec des `Path` résolus.
56
  """
57
  path = config_path or DEFAULT_CONFIG_PATH
58
  raw_config = yaml.safe_load(path.read_text())
scripts/promote_registered_model.py CHANGED
@@ -1,12 +1,9 @@
1
- """Exporte un registered model MLflow vers un artefact local versionne.
2
-
3
- Le script applique volontairement une selection stricte du modele source pour
4
- eviter les exports ambigus quand le registre MLflow contient plusieurs modeles.
5
- """
6
 
7
  from __future__ import annotations
8
 
9
  import argparse
 
10
  from datetime import datetime, timezone
11
  import json
12
  from pathlib import Path
@@ -17,45 +14,71 @@ import mlflow
17
  import mlflow.sklearn
18
  from mlflow.tracking import MlflowClient
19
 
20
-
21
- PROJECT_ROOT = Path(__file__).resolve().parents[1]
22
- DEFAULT_TRACKING_URI = f"sqlite:///{(PROJECT_ROOT / 'artifacts' / 'mlflow.db').resolve()}"
23
- DEFAULT_MODEL_OUTPUT_PATH = PROJECT_ROOT / "artifacts" / "models" / "best_pipeline.joblib"
24
- DEFAULT_METADATA_OUTPUT_PATH = PROJECT_ROOT / "artifacts" / "models" / "best_pipeline_metadata.json"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
 
27
  def parse_args() -> argparse.Namespace:
28
- """Construit l'interface en ligne de commande du script d'export."""
29
  parser = argparse.ArgumentParser(
30
  description=(
31
- "Exporte un registered model MLflow vers artifacts/models/best_pipeline.joblib. "
32
- "Sans --registered-model, le script n'accepte qu'un seul registered model dans le registre."
33
  )
34
  )
35
  parser.add_argument(
36
  "--tracking-uri",
37
- default=DEFAULT_TRACKING_URI,
38
  help="Tracking URI MLflow. Par defaut: base SQLite locale du projet.",
39
  )
40
  parser.add_argument(
41
- "--registered-model",
 
 
 
 
 
42
  default=None,
43
- help="Nom du registered model MLflow a exporter.",
44
  )
45
  parser.add_argument(
46
- "--version",
47
  default=None,
48
- help="Version du registered model a exporter. Par defaut: derniere version disponible.",
49
  )
50
  parser.add_argument(
51
- "--output-model-path",
52
- default=str(DEFAULT_MODEL_OUTPUT_PATH),
53
- help="Chemin de sortie du pipeline joblib exporte.",
 
 
 
 
 
54
  )
55
  parser.add_argument(
56
- "--output-metadata-path",
57
- default=str(DEFAULT_METADATA_OUTPUT_PATH),
58
- help="Chemin de sortie des metadonnees JSON.",
59
  )
60
  return parser.parse_args()
61
 
@@ -64,73 +87,114 @@ def project_relative_path(path: Path) -> str:
64
  """Retourne un chemin relatif au projet si possible."""
65
  resolved = path.resolve()
66
  try:
67
- return str(resolved.relative_to(PROJECT_ROOT))
68
  except ValueError:
69
  return str(resolved)
70
 
71
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  def normalize_registered_model_names(models: list[Any]) -> list[str]:
73
  """Extrait et trie les noms de registered models MLflow."""
74
  return sorted(str(model.name) for model in models)
75
 
76
 
77
- def resolve_registered_model_name(available_names: list[str], requested_name: str | None = None) -> str:
78
- """Selectionne un registered model de maniere non ambigue.
 
 
 
 
 
79
 
80
- Args:
81
- available_names: Noms presents dans le registre MLflow.
82
- requested_name: Nom explicitement demande, si fourni.
83
 
84
- Returns:
85
- str: Nom du registered model retenu.
86
- """
87
- if requested_name:
 
 
 
 
88
  if requested_name not in available_names:
89
- available = ", ".join(available_names) if available_names else "aucun"
90
  raise ValueError(
91
- f"Registered model introuvable: {requested_name}. "
92
- f"Modeles disponibles: {available}."
93
  )
94
  return requested_name
95
 
96
- if not available_names:
 
97
  raise ValueError(
98
- "Aucun registered model MLflow trouvé. "
99
- "Sélectionnez d'abord un registered model avec --registered-model."
100
  )
101
-
102
- if len(available_names) > 1:
103
- available = ", ".join(available_names)
104
  raise ValueError(
105
- "Plusieurs registered models MLflow trouvés. "
106
- f"Sélectionnez explicitement un registered model avec --registered-model. "
107
- f"Modeles disponibles: {available}."
108
  )
109
-
110
- return available_names[0]
111
 
112
 
113
- def _version_sort_key(version: str) -> tuple[int, str]:
114
  """Produit une cle de tri robuste pour les versions MLflow."""
115
- value = str(version)
116
- return (int(value), value) if value.isdigit() else (-1, value)
117
 
118
 
119
- def resolve_model_version(versions: list[Any], requested_version: str | None = None) -> Any:
120
- """Selectionne la version a exporter pour un registered model donne."""
 
 
 
 
 
 
 
121
  if requested_version is not None:
122
  for version in versions:
123
  if str(version.version) == str(requested_version):
124
  return version
125
- available = ", ".join(str(version.version) for version in versions) if versions else "aucune"
126
  raise ValueError(
127
- f"Version introuvable: {requested_version}. Versions disponibles: {available}."
 
 
128
  )
129
 
130
  if not versions:
131
- raise ValueError("Aucune version disponible pour le registered model sélectionné.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
 
133
- return max(versions, key=lambda version: _version_sort_key(str(version.version)))
134
 
135
 
136
  def read_json_if_exists(path: Path) -> dict[str, Any]:
@@ -140,48 +204,49 @@ def read_json_if_exists(path: Path) -> dict[str, Any]:
140
  return json.loads(path.read_text(encoding="utf-8"))
141
 
142
 
143
- def json_ready(value: Any) -> Any:
144
- """Convertit recursivement les types Python en valeurs serialisables JSON."""
145
- if isinstance(value, dict):
146
- return {str(key): json_ready(item) for key, item in value.items()}
147
- if isinstance(value, list):
148
- return [json_ready(item) for item in value]
149
- if isinstance(value, tuple):
150
- return [json_ready(item) for item in value]
151
- if isinstance(value, Path):
152
- return str(value)
153
- if isinstance(value, datetime):
154
- return value.isoformat()
155
- return value
 
 
 
 
 
 
 
 
 
 
 
 
156
 
157
 
158
  def build_export_metadata(
159
  *,
160
  existing_metadata: dict[str, Any],
 
161
  registered_model_name: str,
162
  model_version: Any,
163
  tracking_uri: str,
164
  model_output_path: Path,
 
165
  source_run: Any | None,
166
  ) -> dict[str, Any]:
167
- """Construit les metadonnees de tracabilite de l'export local.
168
-
169
- Args:
170
- existing_metadata: Metadonnees deja presentes sur disque.
171
- registered_model_name: Nom du registered model exporte.
172
- model_version: Version MLflow exportee.
173
- tracking_uri: Tracking URI source.
174
- model_output_path: Chemin du joblib genere.
175
- source_run: Run MLflow source, si disponible.
176
-
177
- Returns:
178
- dict[str, Any]: Metadonnees consolidees de l'export.
179
- """
180
  metadata = dict(existing_metadata)
181
-
182
  metadata.update(
183
  {
184
- "artifact_role": "mlflow_registered_model_export",
185
  "registered_model_name": registered_model_name,
186
  "registered_model_version": str(model_version.version),
187
  "registered_model_stage": str(getattr(model_version, "current_stage", "None") or "None"),
@@ -191,8 +256,10 @@ def build_export_metadata(
191
  "tracking_uri": tracking_uri,
192
  "exported_at_utc": datetime.now(timezone.utc).isoformat(),
193
  "artifact_path": project_relative_path(model_output_path),
194
- "used_by_final_api": False,
195
- "consumer": "single_model_export_pipeline",
 
 
196
  }
197
  )
198
 
@@ -207,69 +274,163 @@ def build_export_metadata(
207
  return metadata
208
 
209
 
210
- def export_registered_model(
211
- *,
212
- tracking_uri: str,
213
- registered_model_name: str,
214
- model_version: Any,
215
- model_output_path: Path,
216
- ) -> None:
217
- """Charge un modele depuis MLflow et l'exporte en `joblib` local."""
218
- mlflow.set_tracking_uri(tracking_uri)
219
- model_uri = f"models:/{registered_model_name}/{model_version.version}"
220
- estimator = mlflow.sklearn.load_model(model_uri)
221
- model_output_path.parent.mkdir(parents=True, exist_ok=True)
222
- joblib.dump(estimator, model_output_path)
223
-
224
 
225
- def main() -> None:
226
- """Execute l'export du registered model depuis la CLI."""
227
- args = parse_args()
228
- tracking_uri = str(args.tracking_uri)
229
- model_output_path = Path(args.output_model_path).resolve()
230
- metadata_output_path = Path(args.output_metadata_path).resolve()
231
 
232
- mlflow.set_tracking_uri(tracking_uri)
233
- client = MlflowClient(tracking_uri=tracking_uri)
234
 
235
- registered_models = list(client.search_registered_models())
236
- available_names = normalize_registered_model_names(registered_models)
237
- registered_model_name = resolve_registered_model_name(
238
- available_names,
239
- requested_name=args.registered_model,
 
 
 
 
 
 
 
 
 
 
240
  )
241
-
242
- model_versions = list(client.search_model_versions(f"name = '{registered_model_name}'"))
243
- selected_version = resolve_model_version(model_versions, requested_version=args.version)
244
- source_run = client.get_run(selected_version.run_id) if getattr(selected_version, "run_id", None) else None
245
-
246
- export_registered_model(
247
- tracking_uri=tracking_uri,
248
  registered_model_name=registered_model_name,
249
- model_version=selected_version,
250
- model_output_path=model_output_path,
251
  )
 
252
 
253
- existing_metadata = read_json_if_exists(metadata_output_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
254
  export_metadata = build_export_metadata(
255
  existing_metadata=existing_metadata,
 
256
  registered_model_name=registered_model_name,
257
  model_version=selected_version,
258
  tracking_uri=tracking_uri,
259
- model_output_path=model_output_path,
 
260
  source_run=source_run,
261
  )
262
- metadata_output_path.parent.mkdir(parents=True, exist_ok=True)
263
- metadata_output_path.write_text(
264
  json.dumps(json_ready(export_metadata), indent=2, ensure_ascii=True),
265
  encoding="utf-8",
266
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
267
 
268
- print(f"Registered model exporté : {registered_model_name}")
269
- print(f"Version exportée : {selected_version.version}")
270
- print(f"Run source : {getattr(selected_version, 'run_id', None)}")
271
- print(f"Pipeline joblib : {model_output_path}")
272
- print(f"Metadonnées : {metadata_output_path}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
273
 
274
 
275
  if __name__ == "__main__":
 
1
+ """Promouvoit les deux registered models runtime depuis MLflow vers le disque."""
 
 
 
 
2
 
3
  from __future__ import annotations
4
 
5
  import argparse
6
+ from dataclasses import replace
7
  from datetime import datetime, timezone
8
  import json
9
  from pathlib import Path
 
14
  import mlflow.sklearn
15
  from mlflow.tracking import MlflowClient
16
 
17
+ from scripts.mlflow_config import normalize_tracking_uri
18
+ from scripts.runtime_model_specs import (
19
+ DEFAULT_MLFLOW_TRACKING_URI,
20
+ DEFAULT_MODELS_DIR,
21
+ HISTORICAL_RUNTIME_MODEL_SPEC,
22
+ RuntimeModelSpec,
23
+ SIMULATION_RUNTIME_MODEL_SPEC,
24
+ )
25
+
26
+
27
+ REQUIRED_RUNTIME_METADATA_FIELDS = {
28
+ "runtime_model_role",
29
+ "registered_model_name",
30
+ "registered_model_version",
31
+ "registered_model_run_id",
32
+ "model_uri",
33
+ "tracking_uri",
34
+ "exported_at_utc",
35
+ "artifact_path",
36
+ "metadata_path",
37
+ }
38
 
39
 
40
  def parse_args() -> argparse.Namespace:
41
+ """Construit l'interface CLI du script de promotion runtime."""
42
  parser = argparse.ArgumentParser(
43
  description=(
44
+ "Promote the two MLflow registered models used by the FastAPI runtime "
45
+ "and export them to artifacts/models/."
46
  )
47
  )
48
  parser.add_argument(
49
  "--tracking-uri",
50
+ default=DEFAULT_MLFLOW_TRACKING_URI,
51
  help="Tracking URI MLflow. Par defaut: base SQLite locale du projet.",
52
  )
53
  parser.add_argument(
54
+ "--models-dir",
55
+ default=str(DEFAULT_MODELS_DIR),
56
+ help="Dossier cible pour les artefacts runtime exportes.",
57
+ )
58
+ parser.add_argument(
59
+ "--historical-registered-model",
60
  default=None,
61
+ help="Nom du registered model historique a exporter.",
62
  )
63
  parser.add_argument(
64
+ "--historical-version",
65
  default=None,
66
+ help="Version MLflow du modele historique a exporter.",
67
  )
68
  parser.add_argument(
69
+ "--simulation-registered-model",
70
+ default=None,
71
+ help="Nom du registered model local/simulation a exporter.",
72
+ )
73
+ parser.add_argument(
74
+ "--simulation-version",
75
+ default=None,
76
+ help="Version MLflow du modele local/simulation a exporter.",
77
  )
78
  parser.add_argument(
79
+ "--json",
80
+ action="store_true",
81
+ help="Imprime le resume de promotion au format JSON.",
82
  )
83
  return parser.parse_args()
84
 
 
87
  """Retourne un chemin relatif au projet si possible."""
88
  resolved = path.resolve()
89
  try:
90
+ return str(resolved.relative_to(Path(__file__).resolve().parents[1]))
91
  except ValueError:
92
  return str(resolved)
93
 
94
 
95
+ def json_ready(value: Any) -> Any:
96
+ """Convertit recursivement les types Python en valeurs serialisables JSON."""
97
+ if isinstance(value, dict):
98
+ return {str(key): json_ready(item) for key, item in value.items()}
99
+ if isinstance(value, list):
100
+ return [json_ready(item) for item in value]
101
+ if isinstance(value, tuple):
102
+ return [json_ready(item) for item in value]
103
+ if isinstance(value, Path):
104
+ return str(value)
105
+ if isinstance(value, datetime):
106
+ return value.isoformat()
107
+ return value
108
+
109
+
110
  def normalize_registered_model_names(models: list[Any]) -> list[str]:
111
  """Extrait et trie les noms de registered models MLflow."""
112
  return sorted(str(model.name) for model in models)
113
 
114
 
115
+ def with_models_dir(spec: RuntimeModelSpec, models_dir: Path) -> RuntimeModelSpec:
116
+ """Construit une specification identique avec un dossier cible surcharge."""
117
+ return replace(
118
+ spec,
119
+ output_model_path=models_dir / spec.output_model_path.name,
120
+ output_metadata_path=models_dir / spec.output_metadata_path.name,
121
+ )
122
 
 
 
 
123
 
124
+ def resolve_registered_model_name_for_role(
125
+ *,
126
+ role_spec: RuntimeModelSpec,
127
+ available_names: list[str],
128
+ requested_name: str | None = None,
129
+ ) -> str:
130
+ """Selectionne le registered model a promouvoir pour un role donne."""
131
+ if requested_name is not None:
132
  if requested_name not in available_names:
133
+ available = ", ".join(available_names) if available_names else "none"
134
  raise ValueError(
135
+ f"Requested registered model {requested_name!r} for role "
136
+ f"{role_spec.role!r} was not found. Available registered models: {available}."
137
  )
138
  return requested_name
139
 
140
+ matching_names = [name for name in available_names if name == role_spec.registered_model_name]
141
+ if not matching_names:
142
  raise ValueError(
143
+ f"No MLflow registered model found for role {role_spec.role!r}. "
144
+ f"Expected one of: {role_spec.registered_model_name}."
145
  )
146
+ if len(matching_names) > 1:
 
 
147
  raise ValueError(
148
+ f"Multiple candidate registered models found for role {role_spec.role!r}. "
149
+ f"Please pass --{role_spec.role}-registered-model."
 
150
  )
151
+ return matching_names[0]
 
152
 
153
 
154
+ def _version_sort_key(version: Any) -> tuple[int, str]:
155
  """Produit une cle de tri robuste pour les versions MLflow."""
156
+ raw_value = str(getattr(version, "version", version))
157
+ return (int(raw_value), raw_value) if raw_value.isdigit() else (-1, raw_value)
158
 
159
 
160
+ def resolve_model_version_for_role(
161
+ versions: list[Any],
162
+ *,
163
+ role_spec: RuntimeModelSpec,
164
+ registered_model_name: str,
165
+ requested_version: str | None = None,
166
+ allow_latest_version: bool = False,
167
+ ) -> Any:
168
+ """Selectionne strictement la version a exporter pour un role runtime."""
169
  if requested_version is not None:
170
  for version in versions:
171
  if str(version.version) == str(requested_version):
172
  return version
173
+ available = ", ".join(str(version.version) for version in versions) if versions else "none"
174
  raise ValueError(
175
+ f"Requested version {requested_version!r} for role {role_spec.role!r} and "
176
+ f"registered model {registered_model_name!r} does not exist. "
177
+ f"Available versions: {available}."
178
  )
179
 
180
  if not versions:
181
+ raise ValueError(
182
+ f"Registered model exists but no version could be resolved for role "
183
+ f"{role_spec.role!r}."
184
+ )
185
+
186
+ if allow_latest_version:
187
+ return max(versions, key=_version_sort_key)
188
+
189
+ if len(versions) > 1:
190
+ available = ", ".join(str(version.version) for version in sorted(versions, key=_version_sort_key))
191
+ raise ValueError(
192
+ f"Multiple versions are available for role {role_spec.role!r} and "
193
+ f"registered model {registered_model_name!r}. "
194
+ f"Please pass --{role_spec.role}-version. Available versions: {available}."
195
+ )
196
 
197
+ return versions[0]
198
 
199
 
200
  def read_json_if_exists(path: Path) -> dict[str, Any]:
 
204
  return json.loads(path.read_text(encoding="utf-8"))
205
 
206
 
207
+ def export_registered_model(
208
+ *,
209
+ tracking_uri: str,
210
+ registered_model_name: str,
211
+ model_version: Any,
212
+ model_output_path: Path,
213
+ ) -> None:
214
+ """Charge un modele depuis MLflow et l'exporte en `joblib` local."""
215
+ mlflow.set_tracking_uri(tracking_uri)
216
+ model_uri = f"models:/{registered_model_name}/{model_version.version}"
217
+ estimator = mlflow.sklearn.load_model(model_uri)
218
+ model_output_path.parent.mkdir(parents=True, exist_ok=True)
219
+ joblib.dump(estimator, model_output_path)
220
+
221
+
222
+ def validate_exported_artifact(model_output_path: Path) -> None:
223
+ """Verifie que l'artefact joblib exporte existe et est rechargeable."""
224
+ if not model_output_path.exists():
225
+ raise RuntimeError(f"Exported artifact is missing or cannot be loaded: {model_output_path}")
226
+ try:
227
+ joblib.load(model_output_path)
228
+ except Exception as exc: # pragma: no cover - defensive branch
229
+ raise RuntimeError(
230
+ f"Exported artifact is missing or cannot be loaded: {model_output_path}"
231
+ ) from exc
232
 
233
 
234
  def build_export_metadata(
235
  *,
236
  existing_metadata: dict[str, Any],
237
+ role_spec: RuntimeModelSpec,
238
  registered_model_name: str,
239
  model_version: Any,
240
  tracking_uri: str,
241
  model_output_path: Path,
242
+ metadata_output_path: Path,
243
  source_run: Any | None,
244
  ) -> dict[str, Any]:
245
+ """Construit les metadonnees de tracabilite de l'export runtime."""
 
 
 
 
 
 
 
 
 
 
 
 
246
  metadata = dict(existing_metadata)
 
247
  metadata.update(
248
  {
249
+ "runtime_model_role": role_spec.role,
250
  "registered_model_name": registered_model_name,
251
  "registered_model_version": str(model_version.version),
252
  "registered_model_stage": str(getattr(model_version, "current_stage", "None") or "None"),
 
256
  "tracking_uri": tracking_uri,
257
  "exported_at_utc": datetime.now(timezone.utc).isoformat(),
258
  "artifact_path": project_relative_path(model_output_path),
259
+ "metadata_path": project_relative_path(metadata_output_path),
260
+ "output_path": project_relative_path(model_output_path),
261
+ "output_metadata_path": project_relative_path(metadata_output_path),
262
+ "role": role_spec.role,
263
  }
264
  )
265
 
 
274
  return metadata
275
 
276
 
277
+ def validate_runtime_metadata(metadata: dict[str, Any], *, role_spec: RuntimeModelSpec) -> None:
278
+ """Verifie que les metadonnees exportees sont coherentes pour le runtime."""
279
+ missing_fields = sorted(
280
+ field_name for field_name in REQUIRED_RUNTIME_METADATA_FIELDS if not metadata.get(field_name)
281
+ )
282
+ if missing_fields:
283
+ raise RuntimeError(
284
+ f"Metadata validation failed for role {role_spec.role!r}. "
285
+ f"Missing fields: {', '.join(missing_fields)}."
286
+ )
 
 
 
 
287
 
288
+ if metadata.get("runtime_model_role") != role_spec.role:
289
+ raise RuntimeError(
290
+ f"Metadata validation failed for role {role_spec.role!r}. "
291
+ f"Unexpected runtime_model_role={metadata.get('runtime_model_role')!r}."
292
+ )
 
293
 
 
 
294
 
295
+ def promote_single_registered_model(
296
+ *,
297
+ client: MlflowClient,
298
+ tracking_uri: str,
299
+ role_spec: RuntimeModelSpec,
300
+ available_names: list[str],
301
+ requested_name: str | None = None,
302
+ requested_version: str | None = None,
303
+ allow_latest_version: bool = False,
304
+ ) -> dict[str, Any]:
305
+ """Promouvoit un registered model runtime unique depuis MLflow."""
306
+ registered_model_name = resolve_registered_model_name_for_role(
307
+ role_spec=role_spec,
308
+ available_names=available_names,
309
+ requested_name=requested_name,
310
  )
311
+ versions = list(client.search_model_versions(f"name = '{registered_model_name}'"))
312
+ selected_version = resolve_model_version_for_role(
313
+ versions,
314
+ role_spec=role_spec,
 
 
 
315
  registered_model_name=registered_model_name,
316
+ requested_version=requested_version,
317
+ allow_latest_version=allow_latest_version,
318
  )
319
+ source_run = client.get_run(selected_version.run_id) if getattr(selected_version, "run_id", None) else None
320
 
321
+ try:
322
+ export_registered_model(
323
+ tracking_uri=tracking_uri,
324
+ registered_model_name=registered_model_name,
325
+ model_version=selected_version,
326
+ model_output_path=role_spec.output_model_path,
327
+ )
328
+ except Exception as exc: # pragma: no cover - defensive branch
329
+ raise RuntimeError(
330
+ f"Export failed for role {role_spec.role!r} and model {registered_model_name!r}."
331
+ ) from exc
332
+
333
+ validate_exported_artifact(role_spec.output_model_path)
334
+ existing_metadata = read_json_if_exists(role_spec.output_metadata_path)
335
  export_metadata = build_export_metadata(
336
  existing_metadata=existing_metadata,
337
+ role_spec=role_spec,
338
  registered_model_name=registered_model_name,
339
  model_version=selected_version,
340
  tracking_uri=tracking_uri,
341
+ model_output_path=role_spec.output_model_path,
342
+ metadata_output_path=role_spec.output_metadata_path,
343
  source_run=source_run,
344
  )
345
+ role_spec.output_metadata_path.parent.mkdir(parents=True, exist_ok=True)
346
+ role_spec.output_metadata_path.write_text(
347
  json.dumps(json_ready(export_metadata), indent=2, ensure_ascii=True),
348
  encoding="utf-8",
349
  )
350
+ validate_runtime_metadata(export_metadata, role_spec=role_spec)
351
+
352
+ return {
353
+ "role": role_spec.role,
354
+ "registered_model_name": registered_model_name,
355
+ "registered_model_version": str(selected_version.version),
356
+ "registered_model_run_id": getattr(selected_version, "run_id", None),
357
+ "model_uri": f"models:/{registered_model_name}/{selected_version.version}",
358
+ "artifact_path": project_relative_path(role_spec.output_model_path),
359
+ "metadata_path": project_relative_path(role_spec.output_metadata_path),
360
+ }
361
+
362
+
363
+ def promote_registered_models(
364
+ *,
365
+ tracking_uri: str = DEFAULT_MLFLOW_TRACKING_URI,
366
+ models_dir: str | Path = DEFAULT_MODELS_DIR,
367
+ historical_registered_model: str | None = None,
368
+ historical_version: str | None = None,
369
+ simulation_registered_model: str | None = None,
370
+ simulation_version: str | None = None,
371
+ allow_latest_version: bool = False,
372
+ ) -> dict[str, Any]:
373
+ """Promouvoit les deux registered models runtime depuis MLflow."""
374
+ tracking_uri = normalize_tracking_uri(tracking_uri)
375
+ resolved_models_dir = Path(models_dir).resolve()
376
+ historical_spec = with_models_dir(HISTORICAL_RUNTIME_MODEL_SPEC, resolved_models_dir)
377
+ simulation_spec = with_models_dir(SIMULATION_RUNTIME_MODEL_SPEC, resolved_models_dir)
378
+
379
+ mlflow.set_tracking_uri(tracking_uri)
380
+ client = MlflowClient(tracking_uri=tracking_uri)
381
+ registered_models = list(client.search_registered_models())
382
+ available_names = normalize_registered_model_names(registered_models)
383
+
384
+ historical_summary = promote_single_registered_model(
385
+ client=client,
386
+ tracking_uri=tracking_uri,
387
+ role_spec=historical_spec,
388
+ available_names=available_names,
389
+ requested_name=historical_registered_model,
390
+ requested_version=historical_version,
391
+ allow_latest_version=allow_latest_version,
392
+ )
393
+ simulation_summary = promote_single_registered_model(
394
+ client=client,
395
+ tracking_uri=tracking_uri,
396
+ role_spec=simulation_spec,
397
+ available_names=available_names,
398
+ requested_name=simulation_registered_model,
399
+ requested_version=simulation_version,
400
+ allow_latest_version=allow_latest_version,
401
+ )
402
+ return {
403
+ "tracking_uri": tracking_uri,
404
+ "models_dir": project_relative_path(resolved_models_dir),
405
+ "historical": historical_summary,
406
+ "simulation": simulation_summary,
407
+ }
408
 
409
+
410
+ def main() -> None:
411
+ """Execute la promotion runtime depuis la CLI."""
412
+ args = parse_args()
413
+ summary = promote_registered_models(
414
+ tracking_uri=str(args.tracking_uri),
415
+ models_dir=args.models_dir,
416
+ historical_registered_model=args.historical_registered_model,
417
+ historical_version=args.historical_version,
418
+ simulation_registered_model=args.simulation_registered_model,
419
+ simulation_version=args.simulation_version,
420
+ )
421
+ if args.json:
422
+ print(json.dumps(summary, indent=2, ensure_ascii=True))
423
+ return
424
+
425
+ for role_name in ("historical", "simulation"):
426
+ role_summary = summary[role_name]
427
+ print(
428
+ "[promotion] "
429
+ f"role={role_summary['role']} "
430
+ f"registered_model={role_summary['registered_model_name']} "
431
+ f"version={role_summary['registered_model_version']} "
432
+ f"artifact={role_summary['artifact_path']}"
433
+ )
434
 
435
 
436
  if __name__ == "__main__":
scripts/run_full_pipeline.py CHANGED
@@ -1,31 +1,122 @@
1
- """Orchestre la chaine locale du projet, de la preparation a la validation."""
2
 
3
  from __future__ import annotations
4
 
5
  import argparse
6
  import json
 
 
7
  from pathlib import Path
8
  import sys
9
 
 
 
 
10
 
11
  PROJECT_ROOT = Path(__file__).resolve().parents[1]
12
  if str(PROJECT_ROOT) not in sys.path:
13
  sys.path.insert(0, str(PROJECT_ROOT))
14
 
15
- from scripts.pipeline_utils import ensure_paths_exist, execute_notebook, relative_to_project
 
 
 
 
 
 
16
  from scripts.run_preparation import run_preparation
17
  from scripts.train_historical_model import train_historical_model
18
  from scripts.train_simulation_model import train_simulation_model
19
  from scripts.validate_runtime import validate_runtime
20
 
21
 
22
- EXPERIENCE_2_NOTEBOOK_PATH = Path("notebooks/experience_2.ipynb")
23
- EXPERIENCE_2_OUTPUTS = [
24
- Path("artifacts/experiments/experience_2/dataset_series_temporelles.csv"),
25
- Path("artifacts/experiments/experience_2/model_results.csv"),
26
- Path("artifacts/experiments/experience_2/experience_2_summary.csv"),
27
- ]
28
- EXPERIENCE_3_NOTEBOOK_PATH = Path("notebooks/experience_3.ipynb")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
 
31
  def parse_args() -> argparse.Namespace:
@@ -38,21 +129,11 @@ def parse_args() -> argparse.Namespace:
38
  action="store_true",
39
  help="Reuse the existing preparation outputs instead of re-executing preparation.ipynb.",
40
  )
41
- parser.add_argument(
42
- "--run-experience-2",
43
- action="store_true",
44
- help="Optionally execute the abandoned complementary temporal notebook.",
45
- )
46
  parser.add_argument(
47
  "--skip-runtime-validation",
48
  action="store_true",
49
  help="Skip the final smoke test against the runtime service.",
50
  )
51
- parser.add_argument(
52
- "--run-experience-3",
53
- action="store_true",
54
- help="Also execute notebooks/experience_3.ipynb after the artifacts are rebuilt.",
55
- )
56
  parser.add_argument(
57
  "--reuse-simulation-artifact",
58
  action="store_true",
@@ -75,6 +156,21 @@ def parse_args() -> argparse.Namespace:
75
  default="python3",
76
  help="Jupyter kernel used to execute notebook-backed stages.",
77
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  parser.add_argument(
79
  "--json",
80
  action="store_true",
@@ -86,74 +182,78 @@ def parse_args() -> argparse.Namespace:
86
  def run_full_pipeline(
87
  *,
88
  skip_preparation: bool = False,
89
- run_experience_2: bool = False,
90
  skip_runtime_validation: bool = False,
91
- run_experience_3: bool = False,
92
  reuse_simulation_artifact: bool = False,
93
  simulation_sample_size: int = 200_000,
94
  notebook_timeout_seconds: int = 7200,
95
  kernel_name: str = "python3",
 
 
 
96
  ) -> dict[str, object]:
97
  """Execute les principales etapes de regeneration des artefacts.
98
 
99
  Args:
100
  skip_preparation: Saute `preparation.ipynb` si les sorties existent deja.
101
- run_experience_2: Execute explicitement le notebook temporel abandonne.
102
  skip_runtime_validation: Saute le smoke test final.
103
- run_experience_3: Execute aussi le notebook de verification de stack.
104
  reuse_simulation_artifact: Reutilise le modele local existant au lieu de le reentrainer.
105
  simulation_sample_size: Taille d'echantillon pour le modele local.
106
  notebook_timeout_seconds: Timeout applique a chaque notebook execute.
107
  kernel_name: Kernel Jupyter a utiliser.
 
 
 
108
 
109
  Returns:
110
  dict[str, object]: Resume des etapes executees et des artefacts verifies.
111
  """
112
- results: dict[str, object] = {}
 
 
 
 
 
 
113
 
114
  if not skip_preparation:
115
  results["preparation"] = run_preparation(
116
  timeout_seconds=notebook_timeout_seconds,
117
  kernel_name=kernel_name,
118
  )
 
 
119
 
120
  results["historical_model"] = train_historical_model(
 
121
  cv_splits=4,
122
  )
123
 
124
- if run_experience_2:
125
- print(f"[experience_2] Executing {relative_to_project(EXPERIENCE_2_NOTEBOOK_PATH)}")
126
- execute_notebook(
127
- EXPERIENCE_2_NOTEBOOK_PATH,
128
- timeout_seconds=notebook_timeout_seconds,
129
- kernel_name=kernel_name,
130
- )
131
- resolved_outputs = ensure_paths_exist(EXPERIENCE_2_OUTPUTS, label="experience_2 outputs")
132
- print("[experience_2] Outputs validated")
133
- results["experience_2"] = {
134
- "notebook": relative_to_project(EXPERIENCE_2_NOTEBOOK_PATH),
135
- "outputs": [relative_to_project(path) for path in resolved_outputs],
136
- }
137
-
138
  results["simulation_model"] = train_simulation_model(
139
  force_retrain=not reuse_simulation_artifact,
140
  save_artifact=True,
141
  sample_size=simulation_sample_size,
 
 
 
 
 
 
 
142
  )
143
-
144
- if run_experience_3:
145
- print(f"[experience_3] Executing {relative_to_project(EXPERIENCE_3_NOTEBOOK_PATH)}")
146
- execute_notebook(
147
- EXPERIENCE_3_NOTEBOOK_PATH,
148
- timeout_seconds=notebook_timeout_seconds,
149
- kernel_name=kernel_name,
150
- )
151
- results["experience_3"] = {
152
- "notebook": relative_to_project(EXPERIENCE_3_NOTEBOOK_PATH),
153
- }
154
 
155
  if not skip_runtime_validation:
156
  results["runtime_validation"] = validate_runtime()
 
 
 
 
 
 
 
 
 
 
 
157
 
158
  return results
159
 
@@ -163,13 +263,14 @@ def main() -> None:
163
  args = parse_args()
164
  summary = run_full_pipeline(
165
  skip_preparation=args.skip_preparation,
166
- run_experience_2=args.run_experience_2,
167
  skip_runtime_validation=args.skip_runtime_validation,
168
- run_experience_3=args.run_experience_3,
169
  reuse_simulation_artifact=args.reuse_simulation_artifact,
170
  simulation_sample_size=args.simulation_sample_size,
171
  notebook_timeout_seconds=args.notebook_timeout_seconds,
172
  kernel_name=args.kernel_name,
 
 
 
173
  )
174
  if args.json:
175
  print(json.dumps(summary, indent=2, ensure_ascii=True))
 
1
+ """Orchestre la chaine locale officielle, de la preparation a la validation."""
2
 
3
  from __future__ import annotations
4
 
5
  import argparse
6
  import json
7
+ import math
8
+ from numbers import Real
9
  from pathlib import Path
10
  import sys
11
 
12
+ import mlflow
13
+ from mlflow.tracking import MlflowClient
14
+
15
 
16
  PROJECT_ROOT = Path(__file__).resolve().parents[1]
17
  if str(PROJECT_ROOT) not in sys.path:
18
  sys.path.insert(0, str(PROJECT_ROOT))
19
 
20
+ from scripts.mlflow_config import (
21
+ DEFAULT_MLFLOW_TRACKING_URI,
22
+ FULL_PIPELINE_EXPERIMENT_NAME,
23
+ experiment_artifact_location,
24
+ normalize_tracking_uri,
25
+ )
26
+ from scripts.promote_registered_model import promote_registered_models
27
  from scripts.run_preparation import run_preparation
28
  from scripts.train_historical_model import train_historical_model
29
  from scripts.train_simulation_model import train_simulation_model
30
  from scripts.validate_runtime import validate_runtime
31
 
32
 
33
+ def _ensure_full_pipeline_experiment(tracking_uri: str) -> None:
34
+ """Prepare l'experience MLflow qui trace les executions du pipeline complet."""
35
+ mlflow.set_tracking_uri(tracking_uri)
36
+ while mlflow.active_run() is not None:
37
+ mlflow.end_run()
38
+
39
+ client = MlflowClient(tracking_uri=tracking_uri)
40
+ experiment = client.get_experiment_by_name(FULL_PIPELINE_EXPERIMENT_NAME)
41
+ if experiment is None:
42
+ client.create_experiment(
43
+ FULL_PIPELINE_EXPERIMENT_NAME,
44
+ artifact_location=experiment_artifact_location(
45
+ FULL_PIPELINE_EXPERIMENT_NAME,
46
+ tracking_uri=tracking_uri,
47
+ ),
48
+ )
49
+ mlflow.set_experiment(FULL_PIPELINE_EXPERIMENT_NAME)
50
+
51
+
52
+ def _log_numeric_metrics(prefix: str, metrics: object) -> None:
53
+ """Journalise les metriques numeriques disponibles dans un dictionnaire."""
54
+ if not isinstance(metrics, dict):
55
+ return
56
+
57
+ for metric_name, metric_value in metrics.items():
58
+ if isinstance(metric_value, bool) or not isinstance(metric_value, Real):
59
+ continue
60
+ numeric_value = float(metric_value)
61
+ if math.isfinite(numeric_value):
62
+ mlflow.log_metric(f"{prefix}_{metric_name}", numeric_value)
63
+
64
+
65
+ def _log_param_if_present(name: str, value: object) -> None:
66
+ """Journalise un parametre MLflow seulement s'il est renseigne."""
67
+ if value is not None:
68
+ mlflow.log_param(name, value)
69
+
70
+
71
+ def log_pipeline_summary_to_mlflow(
72
+ summary: dict[str, object],
73
+ *,
74
+ tracking_uri: str,
75
+ skip_preparation: bool,
76
+ skip_runtime_validation: bool,
77
+ reuse_simulation_artifact: bool,
78
+ simulation_sample_size: int,
79
+ ) -> dict[str, str]:
80
+ """Ajoute une trace MLflow lisible pour une execution de `run_full_pipeline.py`."""
81
+ resolved_tracking_uri = normalize_tracking_uri(tracking_uri)
82
+ _ensure_full_pipeline_experiment(resolved_tracking_uri)
83
+ serializable_summary = json.loads(json.dumps(summary, ensure_ascii=True, default=str))
84
+
85
+ with mlflow.start_run(run_name=FULL_PIPELINE_EXPERIMENT_NAME) as run:
86
+ mlflow.log_param("entrypoint", "scripts/run_full_pipeline.py")
87
+ mlflow.log_param("skip_preparation", bool(skip_preparation))
88
+ mlflow.log_param("skip_runtime_validation", bool(skip_runtime_validation))
89
+ mlflow.log_param("reuse_simulation_artifact", bool(reuse_simulation_artifact))
90
+ mlflow.log_param("simulation_sample_size", int(simulation_sample_size))
91
+
92
+ historical_model = serializable_summary.get("historical_model", {})
93
+ simulation_model = serializable_summary.get("simulation_model", {})
94
+ runtime_validation = serializable_summary.get("runtime_validation", {})
95
+
96
+ if isinstance(historical_model, dict):
97
+ _log_param_if_present("historical_registered_model", historical_model.get("registered_model_name"))
98
+ _log_param_if_present(
99
+ "historical_registered_model_version",
100
+ historical_model.get("registered_model_version"),
101
+ )
102
+ _log_numeric_metrics("historical", historical_model.get("metrics"))
103
+ if isinstance(simulation_model, dict):
104
+ _log_param_if_present("simulation_registered_model", simulation_model.get("registered_model_name"))
105
+ _log_param_if_present(
106
+ "simulation_registered_model_version",
107
+ simulation_model.get("registered_model_version"),
108
+ )
109
+ _log_numeric_metrics("simulation", simulation_model.get("metrics"))
110
+ if isinstance(runtime_validation, dict):
111
+ mlflow.log_param("runtime_validation_skipped", bool(runtime_validation.get("skipped", False)))
112
+ _log_param_if_present("runtime_validation_status", runtime_validation.get("status", "executed"))
113
+
114
+ mlflow.log_dict(serializable_summary, "pipeline_summary.json")
115
+ return {
116
+ "experiment_name": FULL_PIPELINE_EXPERIMENT_NAME,
117
+ "run_id": run.info.run_id,
118
+ "tracking_uri": resolved_tracking_uri,
119
+ }
120
 
121
 
122
  def parse_args() -> argparse.Namespace:
 
129
  action="store_true",
130
  help="Reuse the existing preparation outputs instead of re-executing preparation.ipynb.",
131
  )
 
 
 
 
 
132
  parser.add_argument(
133
  "--skip-runtime-validation",
134
  action="store_true",
135
  help="Skip the final smoke test against the runtime service.",
136
  )
 
 
 
 
 
137
  parser.add_argument(
138
  "--reuse-simulation-artifact",
139
  action="store_true",
 
156
  default="python3",
157
  help="Jupyter kernel used to execute notebook-backed stages.",
158
  )
159
+ parser.add_argument(
160
+ "--tracking-uri",
161
+ default=DEFAULT_MLFLOW_TRACKING_URI,
162
+ help="Tracking URI MLflow partage entre entrainement et promotion.",
163
+ )
164
+ parser.add_argument(
165
+ "--historical-version",
166
+ default=None,
167
+ help="Version MLflow historique a promouvoir. Par defaut, le pipeline prend la derniere version.",
168
+ )
169
+ parser.add_argument(
170
+ "--simulation-version",
171
+ default=None,
172
+ help="Version MLflow simulation a promouvoir. Par defaut, le pipeline prend la derniere version.",
173
+ )
174
  parser.add_argument(
175
  "--json",
176
  action="store_true",
 
182
  def run_full_pipeline(
183
  *,
184
  skip_preparation: bool = False,
 
185
  skip_runtime_validation: bool = False,
 
186
  reuse_simulation_artifact: bool = False,
187
  simulation_sample_size: int = 200_000,
188
  notebook_timeout_seconds: int = 7200,
189
  kernel_name: str = "python3",
190
+ tracking_uri: str = DEFAULT_MLFLOW_TRACKING_URI,
191
+ historical_version: str | None = None,
192
+ simulation_version: str | None = None,
193
  ) -> dict[str, object]:
194
  """Execute les principales etapes de regeneration des artefacts.
195
 
196
  Args:
197
  skip_preparation: Saute `preparation.ipynb` si les sorties existent deja.
 
198
  skip_runtime_validation: Saute le smoke test final.
 
199
  reuse_simulation_artifact: Reutilise le modele local existant au lieu de le reentrainer.
200
  simulation_sample_size: Taille d'echantillon pour le modele local.
201
  notebook_timeout_seconds: Timeout applique a chaque notebook execute.
202
  kernel_name: Kernel Jupyter a utiliser.
203
+ tracking_uri: Tracking URI MLflow utilise pour l'entrainement et la promotion.
204
+ historical_version: Version historique a promouvoir, ou derniere version si absent.
205
+ simulation_version: Version simulation a promouvoir, ou derniere version si absent.
206
 
207
  Returns:
208
  dict[str, object]: Resume des etapes executees et des artefacts verifies.
209
  """
210
+ tracking_uri = normalize_tracking_uri(tracking_uri)
211
+ results: dict[str, object] = {
212
+ "mlflow": {
213
+ "tracking_uri": tracking_uri,
214
+ "pipeline_experiment": FULL_PIPELINE_EXPERIMENT_NAME,
215
+ }
216
+ }
217
 
218
  if not skip_preparation:
219
  results["preparation"] = run_preparation(
220
  timeout_seconds=notebook_timeout_seconds,
221
  kernel_name=kernel_name,
222
  )
223
+ else:
224
+ results["preparation"] = {"skipped": True}
225
 
226
  results["historical_model"] = train_historical_model(
227
+ tracking_uri=tracking_uri,
228
  cv_splits=4,
229
  )
230
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
231
  results["simulation_model"] = train_simulation_model(
232
  force_retrain=not reuse_simulation_artifact,
233
  save_artifact=True,
234
  sample_size=simulation_sample_size,
235
+ tracking_uri=tracking_uri,
236
+ )
237
+ results["registered_model_promotion"] = promote_registered_models(
238
+ tracking_uri=tracking_uri,
239
+ historical_version=historical_version,
240
+ simulation_version=simulation_version,
241
+ allow_latest_version=True,
242
  )
 
 
 
 
 
 
 
 
 
 
 
243
 
244
  if not skip_runtime_validation:
245
  results["runtime_validation"] = validate_runtime()
246
+ else:
247
+ results["runtime_validation"] = {"skipped": True}
248
+
249
+ results["pipeline_run"] = log_pipeline_summary_to_mlflow(
250
+ results,
251
+ tracking_uri=tracking_uri,
252
+ skip_preparation=skip_preparation,
253
+ skip_runtime_validation=skip_runtime_validation,
254
+ reuse_simulation_artifact=reuse_simulation_artifact,
255
+ simulation_sample_size=simulation_sample_size,
256
+ )
257
 
258
  return results
259
 
 
263
  args = parse_args()
264
  summary = run_full_pipeline(
265
  skip_preparation=args.skip_preparation,
 
266
  skip_runtime_validation=args.skip_runtime_validation,
 
267
  reuse_simulation_artifact=args.reuse_simulation_artifact,
268
  simulation_sample_size=args.simulation_sample_size,
269
  notebook_timeout_seconds=args.notebook_timeout_seconds,
270
  kernel_name=args.kernel_name,
271
+ tracking_uri=args.tracking_uri,
272
+ historical_version=args.historical_version,
273
+ simulation_version=args.simulation_version,
274
  )
275
  if args.json:
276
  print(json.dumps(summary, indent=2, ensure_ascii=True))
scripts/runtime_model_specs.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Centralise les contrats des deux modeles runtime du projet."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ from pathlib import Path
7
+
8
+ from scripts.mlflow_config import DEFAULT_MLFLOW_TRACKING_URI, PROJECT_ROOT
9
+
10
+
11
+ DEFAULT_MODELS_DIR = PROJECT_ROOT / "artifacts" / "models"
12
+
13
+
14
+ @dataclass(frozen=True)
15
+ class RuntimeModelSpec:
16
+ """Decrit un modele runtime attendu par l'API finale."""
17
+
18
+ role: str
19
+ registered_model_name: str
20
+ output_model_path: Path
21
+ output_metadata_path: Path
22
+
23
+
24
+ HISTORICAL_RUNTIME_MODEL_SPEC = RuntimeModelSpec(
25
+ role="historical",
26
+ registered_model_name="p1_historical_pipeline",
27
+ output_model_path=DEFAULT_MODELS_DIR / "p1_historical_pipeline.joblib",
28
+ output_metadata_path=DEFAULT_MODELS_DIR / "p1_historical_metadata.json",
29
+ )
30
+
31
+ SIMULATION_RUNTIME_MODEL_SPEC = RuntimeModelSpec(
32
+ role="simulation",
33
+ registered_model_name="p23_simulation_pipeline",
34
+ output_model_path=DEFAULT_MODELS_DIR / "p23_simulation_pipeline.joblib",
35
+ output_metadata_path=DEFAULT_MODELS_DIR / "p23_simulation_metadata.json",
36
+ )
37
+
38
+ RUNTIME_MODEL_SPECS = {
39
+ HISTORICAL_RUNTIME_MODEL_SPEC.role: HISTORICAL_RUNTIME_MODEL_SPEC,
40
+ SIMULATION_RUNTIME_MODEL_SPEC.role: SIMULATION_RUNTIME_MODEL_SPEC,
41
+ }
scripts/train_historical_model.py CHANGED
@@ -14,16 +14,17 @@ if str(PROJECT_ROOT) not in sys.path:
14
 
15
  from scripts.experience_1 import run_experience_1
16
  from scripts.pipeline_utils import ensure_paths_exist, relative_to_project
 
17
 
18
 
19
  EXPERIENCE_1_SCRIPT_PATH = Path("scripts/experience_1.py")
20
  HISTORICAL_OUTPUTS = [
21
  Path("artifacts/experiments/experience_1/dataset_consolide_historique_colonnes.csv"),
22
  Path("artifacts/experiments/experience_1/model_results.csv"),
23
- Path("artifacts/models/p1_historical_pipeline.joblib"),
24
- Path("artifacts/models/p1_historical_metadata.json"),
25
  ]
26
- HISTORICAL_METADATA_PATH = Path("artifacts/models/p1_historical_metadata.json")
27
 
28
 
29
  def parse_args() -> argparse.Namespace:
@@ -68,10 +69,15 @@ def train_historical_model(
68
  )
69
  return {
70
  "script": relative_to_project(EXPERIENCE_1_SCRIPT_PATH),
 
71
  "training_notebook_reference": metadata.get("training_notebook"),
72
  "outputs": [relative_to_project(path) for path in resolved_outputs],
73
  "model_name": metadata.get("model_name"),
74
  "target_year": metadata.get("target_year"),
 
 
 
 
75
  "metrics": metrics,
76
  }
77
 
 
14
 
15
  from scripts.experience_1 import run_experience_1
16
  from scripts.pipeline_utils import ensure_paths_exist, relative_to_project
17
+ from scripts.runtime_model_specs import HISTORICAL_RUNTIME_MODEL_SPEC
18
 
19
 
20
  EXPERIENCE_1_SCRIPT_PATH = Path("scripts/experience_1.py")
21
  HISTORICAL_OUTPUTS = [
22
  Path("artifacts/experiments/experience_1/dataset_consolide_historique_colonnes.csv"),
23
  Path("artifacts/experiments/experience_1/model_results.csv"),
24
+ HISTORICAL_RUNTIME_MODEL_SPEC.output_model_path.relative_to(PROJECT_ROOT),
25
+ HISTORICAL_RUNTIME_MODEL_SPEC.output_metadata_path.relative_to(PROJECT_ROOT),
26
  ]
27
+ HISTORICAL_METADATA_PATH = HISTORICAL_RUNTIME_MODEL_SPEC.output_metadata_path.relative_to(PROJECT_ROOT)
28
 
29
 
30
  def parse_args() -> argparse.Namespace:
 
69
  )
70
  return {
71
  "script": relative_to_project(EXPERIENCE_1_SCRIPT_PATH),
72
+ "artifact_source": "retrained",
73
  "training_notebook_reference": metadata.get("training_notebook"),
74
  "outputs": [relative_to_project(path) for path in resolved_outputs],
75
  "model_name": metadata.get("model_name"),
76
  "target_year": metadata.get("target_year"),
77
+ "registered_model_name": metadata.get("registered_model_name"),
78
+ "registered_model_version": metadata.get("registered_model_version"),
79
+ "registered_model_run_id": metadata.get("registered_model_run_id"),
80
+ "model_uri": metadata.get("model_uri"),
81
  "metrics": metrics,
82
  }
83
 
scripts/train_simulation_model.py CHANGED
@@ -3,26 +3,41 @@
3
  from __future__ import annotations
4
 
5
  import argparse
 
6
  from pathlib import Path
7
  import sys
8
 
 
 
 
9
 
10
  PROJECT_ROOT = Path(__file__).resolve().parents[1]
11
  if str(PROJECT_ROOT) not in sys.path:
12
  sys.path.insert(0, str(PROJECT_ROOT))
13
 
 
 
 
 
 
 
14
  from scripts.pipeline_utils import ensure_paths_exist, relative_to_project
15
  from scripts.prediction_adjustment import (
16
  SIMULATION_METADATA_PATH,
17
  SIMULATION_MODEL_PATH,
18
  load_or_train_simulation_model,
19
  )
 
 
 
 
20
 
21
 
22
  SIMULATION_OUTPUTS = [
23
  SIMULATION_MODEL_PATH,
24
  SIMULATION_METADATA_PATH,
25
  ]
 
26
 
27
 
28
  def parse_args() -> argparse.Namespace:
@@ -46,14 +61,66 @@ def parse_args() -> argparse.Namespace:
46
  action="store_true",
47
  help="Train in memory without rewriting the model artifacts.",
48
  )
 
 
 
 
 
49
  return parser.parse_args()
50
 
51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  def train_simulation_model(
53
  *,
54
  force_retrain: bool = False,
55
  save_artifact: bool = True,
56
  sample_size: int = 200_000,
 
57
  ) -> dict[str, object]:
58
  """Charge ou reentraine le modele local de simulation.
59
 
@@ -61,18 +128,42 @@ def train_simulation_model(
61
  force_retrain: Force le reentrainement meme si les artefacts existent.
62
  save_artifact: Ecrit les artefacts sur disque si `True`.
63
  sample_size: Nombre maximal de lignes echantillonnees pour l'entrainement.
 
64
 
65
  Returns:
66
  dict[str, object]: Resume du dataset utilise, des metriques et des sorties.
67
  """
 
 
 
 
 
 
68
  loaded_model, simulation_df = load_or_train_simulation_model(
69
  force_retrain=force_retrain,
70
  save_artifact=save_artifact,
71
  sample_size=sample_size,
72
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
73
 
74
  output_paths: list[str] = []
75
  if save_artifact:
 
 
 
 
76
  resolved_outputs = ensure_paths_exist(SIMULATION_OUTPUTS, label="simulation model outputs")
77
  output_paths = [relative_to_project(path) for path in resolved_outputs]
78
 
@@ -85,6 +176,11 @@ def train_simulation_model(
85
  return {
86
  "dataset_rows": int(len(simulation_df)),
87
  "sample_size": loaded_model.metadata.get("sample_size"),
 
 
 
 
 
88
  "metrics": metrics,
89
  "outputs": output_paths,
90
  }
@@ -97,6 +193,7 @@ def main() -> None:
97
  force_retrain=args.force_retrain,
98
  save_artifact=not args.no_save,
99
  sample_size=args.sample_size,
 
100
  )
101
 
102
 
 
3
  from __future__ import annotations
4
 
5
  import argparse
6
+ import json
7
  from pathlib import Path
8
  import sys
9
 
10
+ import mlflow
11
+ from mlflow.tracking import MlflowClient
12
+
13
 
14
  PROJECT_ROOT = Path(__file__).resolve().parents[1]
15
  if str(PROJECT_ROOT) not in sys.path:
16
  sys.path.insert(0, str(PROJECT_ROOT))
17
 
18
+ from scripts.mlflow_logging import log_and_register_sklearn_model
19
+ from scripts.mlflow_config import (
20
+ SIMULATION_RUNTIME_EXPERIMENT_NAME,
21
+ experiment_artifact_location,
22
+ normalize_tracking_uri,
23
+ )
24
  from scripts.pipeline_utils import ensure_paths_exist, relative_to_project
25
  from scripts.prediction_adjustment import (
26
  SIMULATION_METADATA_PATH,
27
  SIMULATION_MODEL_PATH,
28
  load_or_train_simulation_model,
29
  )
30
+ from scripts.runtime_model_specs import (
31
+ DEFAULT_MLFLOW_TRACKING_URI,
32
+ SIMULATION_RUNTIME_MODEL_SPEC,
33
+ )
34
 
35
 
36
  SIMULATION_OUTPUTS = [
37
  SIMULATION_MODEL_PATH,
38
  SIMULATION_METADATA_PATH,
39
  ]
40
+ SIMULATION_MLFLOW_EXPERIMENT_NAME = SIMULATION_RUNTIME_EXPERIMENT_NAME
41
 
42
 
43
  def parse_args() -> argparse.Namespace:
 
61
  action="store_true",
62
  help="Train in memory without rewriting the model artifacts.",
63
  )
64
+ parser.add_argument(
65
+ "--tracking-uri",
66
+ default=DEFAULT_MLFLOW_TRACKING_URI,
67
+ help="Tracking URI MLflow utilise pour journaliser et enregistrer le modele.",
68
+ )
69
  return parser.parse_args()
70
 
71
 
72
+ def _ensure_simulation_mlflow_experiment(tracking_uri: str) -> None:
73
+ """Initialise l'experiment MLflow utilise par la brique de simulation."""
74
+ tracking_uri = normalize_tracking_uri(tracking_uri)
75
+ mlflow.set_tracking_uri(tracking_uri)
76
+ client = MlflowClient(tracking_uri=tracking_uri)
77
+ experiment = client.get_experiment_by_name(SIMULATION_MLFLOW_EXPERIMENT_NAME)
78
+ if experiment is None:
79
+ client.create_experiment(
80
+ SIMULATION_MLFLOW_EXPERIMENT_NAME,
81
+ artifact_location=experiment_artifact_location(
82
+ SIMULATION_MLFLOW_EXPERIMENT_NAME,
83
+ tracking_uri=tracking_uri,
84
+ ),
85
+ )
86
+ mlflow.set_experiment(SIMULATION_MLFLOW_EXPERIMENT_NAME)
87
+
88
+
89
+ def _register_simulation_runtime_model(
90
+ *,
91
+ loaded_model,
92
+ tracking_uri: str,
93
+ ) -> dict[str, str]:
94
+ """Journalise et enregistre le modele local comme registered model MLflow."""
95
+ _ensure_simulation_mlflow_experiment(tracking_uri)
96
+ metrics = loaded_model.metadata.get("metrics", {})
97
+ with mlflow.start_run(run_name=f"{SIMULATION_MLFLOW_EXPERIMENT_NAME}__runtime_model"):
98
+ mlflow.log_param("runtime_model_role", SIMULATION_RUNTIME_MODEL_SPEC.role)
99
+ mlflow.log_param("registered_model_name", SIMULATION_RUNTIME_MODEL_SPEC.registered_model_name)
100
+ mlflow.log_param("training_entrypoint", "scripts/train_simulation_model.py")
101
+ mlflow.log_param("model_name", loaded_model.metadata.get("model_name"))
102
+ mlflow.log_param("dataset_source", loaded_model.metadata.get("dataset_source"))
103
+ mlflow.log_param("sample_size", loaded_model.metadata.get("sample_size"))
104
+ for metric_name, metric_value in metrics.items():
105
+ if metric_value is not None:
106
+ mlflow.log_metric(metric_name, float(metric_value))
107
+ return log_and_register_sklearn_model(
108
+ loaded_model.pipeline,
109
+ artifact_name=SIMULATION_RUNTIME_MODEL_SPEC.registered_model_name,
110
+ registered_model_name=SIMULATION_RUNTIME_MODEL_SPEC.registered_model_name,
111
+ model_metadata={
112
+ "runtime_model_role": SIMULATION_RUNTIME_MODEL_SPEC.role,
113
+ "training_entrypoint": "scripts/train_simulation_model.py",
114
+ },
115
+ )
116
+
117
+
118
  def train_simulation_model(
119
  *,
120
  force_retrain: bool = False,
121
  save_artifact: bool = True,
122
  sample_size: int = 200_000,
123
+ tracking_uri: str = DEFAULT_MLFLOW_TRACKING_URI,
124
  ) -> dict[str, object]:
125
  """Charge ou reentraine le modele local de simulation.
126
 
 
128
  force_retrain: Force le reentrainement meme si les artefacts existent.
129
  save_artifact: Ecrit les artefacts sur disque si `True`.
130
  sample_size: Nombre maximal de lignes echantillonnees pour l'entrainement.
131
+ tracking_uri: Tracking URI MLflow utilise pour le registry.
132
 
133
  Returns:
134
  dict[str, object]: Resume du dataset utilise, des metriques et des sorties.
135
  """
136
+ tracking_uri = normalize_tracking_uri(tracking_uri)
137
+ reused_existing_artifact = (
138
+ not force_retrain
139
+ and SIMULATION_MODEL_PATH.exists()
140
+ and SIMULATION_METADATA_PATH.exists()
141
+ )
142
  loaded_model, simulation_df = load_or_train_simulation_model(
143
  force_retrain=force_retrain,
144
  save_artifact=save_artifact,
145
  sample_size=sample_size,
146
  )
147
+ registration = _register_simulation_runtime_model(
148
+ loaded_model=loaded_model,
149
+ tracking_uri=tracking_uri,
150
+ )
151
+ loaded_model.metadata.update(
152
+ {
153
+ "runtime_model_role": SIMULATION_RUNTIME_MODEL_SPEC.role,
154
+ "registered_model_name": registration["registered_model_name"],
155
+ "registered_model_version": registration["registered_model_version"],
156
+ "registered_model_run_id": registration["run_id"],
157
+ "model_uri": registration["model_uri"],
158
+ }
159
+ )
160
 
161
  output_paths: list[str] = []
162
  if save_artifact:
163
+ SIMULATION_METADATA_PATH.write_text(
164
+ json.dumps(loaded_model.metadata, indent=2, ensure_ascii=True),
165
+ encoding="utf-8",
166
+ )
167
  resolved_outputs = ensure_paths_exist(SIMULATION_OUTPUTS, label="simulation model outputs")
168
  output_paths = [relative_to_project(path) for path in resolved_outputs]
169
 
 
176
  return {
177
  "dataset_rows": int(len(simulation_df)),
178
  "sample_size": loaded_model.metadata.get("sample_size"),
179
+ "artifact_source": "reused_existing" if reused_existing_artifact else "retrained",
180
+ "registered_model_name": registration["registered_model_name"],
181
+ "registered_model_version": registration["registered_model_version"],
182
+ "registered_model_run_id": registration["run_id"],
183
+ "model_uri": registration["model_uri"],
184
  "metrics": metrics,
185
  "outputs": output_paths,
186
  }
 
193
  force_retrain=args.force_retrain,
194
  save_artifact=not args.no_save,
195
  sample_size=args.sample_size,
196
+ tracking_uri=args.tracking_uri,
197
  )
198
 
199
 
streamlit/requirements.txt CHANGED
@@ -5,5 +5,6 @@ pandas==2.3.3
5
  Pillow==11.3.0
6
  requests==2.32.5
7
  scikit-learn==1.8.0
 
8
  streamlit==1.49.1
9
  uvicorn==0.42.0
 
5
  Pillow==11.3.0
6
  requests==2.32.5
7
  scikit-learn==1.8.0
8
+ shap
9
  streamlit==1.49.1
10
  uvicorn==0.42.0