Spaces:

stephmnt
/

rendement_agricole

Sleeping

App Files Files Community

rendement_agricole / scripts /prediction_adjustment.py

stephmnt

Sync from GitHub via hub-sync

fa3d628 verified 25 days ago

raw

history blame contribute delete

42.2 kB

	"""Assemble la logique metier finale de prediction et de recommandation.

	Le module combine un modele historique `P1` et un modele local `P2/P3` pour
	produire un rendement ajuste, une explication interpretable et un classement de
	cultures candidates.
	"""

	from __future__ import annotations

	from dataclasses import dataclass
	from datetime import datetime, timezone
	import json
	from pathlib import Path
	from typing import Any

	import joblib
	import numpy as np
	import pandas as pd
	from sklearn.compose import ColumnTransformer
	from sklearn.impute import SimpleImputer
	from sklearn.linear_model import LinearRegression
	from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
	from sklearn.model_selection import train_test_split
	from sklearn.pipeline import Pipeline
	from sklearn.preprocessing import OneHotEncoder

	from scripts.runtime_model_specs import (
	HISTORICAL_RUNTIME_MODEL_SPEC,
	SIMULATION_RUNTIME_MODEL_SPEC,
	)
	from scripts.simulation_dataset import load_normalized_simulation_dataset

	PROJECT_ROOT = Path(__file__).resolve().parents[1]
	HISTORICAL_WIDE_DATASET_PATH = PROJECT_ROOT / "artifacts/experiments/experience_1/dataset_consolide_historique_colonnes.csv"
	HISTORICAL_MODEL_PATH = HISTORICAL_RUNTIME_MODEL_SPEC.output_model_path
	HISTORICAL_METADATA_PATH = HISTORICAL_RUNTIME_MODEL_SPEC.output_metadata_path
	SIMULATION_DATASET_PATH = PROJECT_ROOT / "data/simulation/crop_yield.csv"
	SIMULATION_MODEL_PATH = SIMULATION_RUNTIME_MODEL_SPEC.output_model_path
	SIMULATION_METADATA_PATH = SIMULATION_RUNTIME_MODEL_SPEC.output_metadata_path

	SEED = 42
	SIMULATION_SAMPLE_SIZE = 200_000
	SIMULATION_FEATURE_COLUMNS = [
	"region",
	"soil_type",
	"rainfall_mm",
	"temperature_celsius",
	"fertilizer_used",
	"irrigation_used",
	"weather_condition",
	"days_to_harvest",
	]


	@dataclass
	class LoadedModel:
	"""Couple simple contenant un pipeline charge et ses metadonnees."""

	pipeline: Pipeline
	metadata: dict[str, Any]


	def _resolve_path(path: str \| Path) -> Path:
	"""Resout un chemin absolu ou relatif par rapport au depot."""
	raw_path = Path(path)
	if raw_path.is_absolute():
	return raw_path
	return PROJECT_ROOT / raw_path


	def _ensure_parent_dir(path: Path) -> None:
	"""Cree le dossier parent d'un artefact si necessaire."""
	path.parent.mkdir(parents=True, exist_ok=True)


	def _json_ready(value: Any) -> Any:
	"""Convertit les types numpy et pandas en types JSON-compatibles."""
	if isinstance(value, (np.floating, np.integer)):
	return value.item()
	if isinstance(value, np.ndarray):
	return value.tolist()
	if isinstance(value, pd.Timestamp):
	return value.isoformat()
	if isinstance(value, dict):
	return {str(key): _json_ready(item) for key, item in value.items()}
	if isinstance(value, list):
	return [_json_ready(item) for item in value]
	return value


	def _safe_float(value: Any) -> float:
	"""Convertit de maniere defensive un scalaire potentiel en `float`."""
	return float(np.asarray(value).reshape(-1)[0])


	def _value_for_display(value: Any) -> Any:
	"""Normalise une valeur pour l'affichage ou la serialisation."""
	if pd.isna(value):
	return None
	if isinstance(value, (np.floating, np.integer)):
	return value.item()
	return value


	def make_dense_onehot_encoder() -> OneHotEncoder:
	"""Construit un `OneHotEncoder` dense compatible avec plusieurs versions sklearn."""
	try:
	return OneHotEncoder(handle_unknown="ignore", sparse_output=False)
	except TypeError:
	return OneHotEncoder(handle_unknown="ignore", sparse=False)


	def build_preprocessor(feature_frame: pd.DataFrame) -> ColumnTransformer:
	"""Construit le preprocesseur commun aux modeles tabulaires.

	Args:
	feature_frame: Table de caracteristiques de reference.

	Returns:
	ColumnTransformer: Pipeline de pretraitement numerique et categoriel.
	"""
	numeric_features = feature_frame.select_dtypes(include=np.number).columns.tolist()
	categorical_features = [col for col in feature_frame.columns if col not in numeric_features]

	return ColumnTransformer(
	transformers=[
	(
	"num",
	Pipeline(
	steps=[
	("imputer", SimpleImputer(strategy="median")),
	]
	),
	numeric_features,
	),
	(
	"cat",
	Pipeline(
	steps=[
	("imputer", SimpleImputer(strategy="most_frequent")),
	("encoder", make_dense_onehot_encoder()),
	]
	),
	categorical_features,
	),
	]
	)


	def compute_regression_metrics(y_true: pd.Series \| np.ndarray, y_pred: np.ndarray) -> dict[str, float]:
	"""Calcule les metriques de regression standard du projet."""
	y_true_array = np.asarray(y_true, dtype=float)
	y_pred_array = np.asarray(y_pred, dtype=float)
	rmse = float(np.sqrt(mean_squared_error(y_true_array, y_pred_array)))
	mae = float(mean_absolute_error(y_true_array, y_pred_array))
	r2 = float(r2_score(y_true_array, y_pred_array)) if len(y_true_array) >= 2 else np.nan
	return {
	"rmse": rmse,
	"mae": mae,
	"r2": r2,
	}


	def normalize_label(value: Any) -> str:
	"""Normalise une etiquette textuelle issue des datasets ou de l'API."""
	return str(value).strip()


	def load_historical_wide_dataset(dataset_path: str \| Path = HISTORICAL_WIDE_DATASET_PATH) -> pd.DataFrame:
	"""Charge le dataset historique consolide utilise par la brique P1."""
	path = _resolve_path(dataset_path)
	historical_df = pd.read_csv(path)
	historical_df["area"] = historical_df["area"].map(normalize_label)
	historical_df["crop"] = historical_df["crop"].map(normalize_label)
	return historical_df


	def load_historical_model(
	model_path: str \| Path = HISTORICAL_MODEL_PATH,
	metadata_path: str \| Path = HISTORICAL_METADATA_PATH,
	) -> LoadedModel:
	"""Charge le pipeline historique et ses metadonnees."""
	resolved_model_path = _resolve_path(model_path)
	resolved_metadata_path = _resolve_path(metadata_path)

	if not resolved_model_path.exists():
	raise FileNotFoundError(f"Historical model artifact not found: {resolved_model_path}")
	if not resolved_metadata_path.exists():
	raise FileNotFoundError(f"Historical metadata artifact not found: {resolved_metadata_path}")

	pipeline = joblib.load(resolved_model_path)
	metadata = json.loads(resolved_metadata_path.read_text(encoding="utf-8"))
	return LoadedModel(pipeline=pipeline, metadata=metadata)


	def load_and_prepare_simulation_dataset(
	simulation_path: str \| Path = SIMULATION_DATASET_PATH,
	) -> pd.DataFrame:
	"""Charge et normalise le dataset de simulation locale."""
	return load_normalized_simulation_dataset(
	_resolve_path(simulation_path),
	boolean_dtype="bool",
	)


	def _fit_simulation_pipeline(
	simulation_df: pd.DataFrame,
	feature_columns: list[str] \| None = None,
	sample_size: int = SIMULATION_SAMPLE_SIZE,
	) -> dict[str, Any]:
	"""Entraine le modele lineaire local utilise pour P2 et P3."""
	selected_features = feature_columns or SIMULATION_FEATURE_COLUMNS
	sampled_df = simulation_df.sample(n=min(sample_size, len(simulation_df)), random_state=SEED).copy()

	X_all = sampled_df[selected_features].copy()
	y_all = sampled_df["yield_tons_per_hectare"].copy()

	X_train, X_test, y_train, y_test = train_test_split(
	X_all,
	y_all,
	test_size=0.2,
	random_state=SEED,
	)

	pipeline = Pipeline(
	steps=[
	("preprocessor", build_preprocessor(X_train)),
	("regressor", LinearRegression()),
	]
	)
	pipeline.fit(X_train, y_train)

	train_metrics = compute_regression_metrics(y_train, pipeline.predict(X_train))
	test_metrics = compute_regression_metrics(y_test, pipeline.predict(X_test))

	final_pipeline = Pipeline(
	steps=[
	("preprocessor", build_preprocessor(X_all)),
	("regressor", LinearRegression()),
	]
	)
	final_pipeline.fit(X_all, y_all)

	metadata = {
	"model_name": "linear_regression",
	"trained_at_utc": datetime.now(timezone.utc).isoformat(),
	"dataset_source": str(_resolve_path(SIMULATION_DATASET_PATH).relative_to(PROJECT_ROOT)),
	"feature_columns": selected_features,
	"sample_size": int(len(sampled_df)),
	"metrics": {
	"train_rmse": train_metrics["rmse"],
	"train_mae": train_metrics["mae"],
	"train_r2": train_metrics["r2"],
	"test_rmse": test_metrics["rmse"],
	"test_mae": test_metrics["mae"],
	"test_r2": test_metrics["r2"],
	},
	"strategy": "2_models_3_predictions_combined",
	"role": "local_adjustment_model_for_P2_and_P3",
	}

	return {
	"pipeline": final_pipeline,
	"metadata": metadata,
	"sampled_df": sampled_df,
	}


	def load_or_train_simulation_model(
	*,
	force_retrain: bool = False,
	save_artifact: bool = True,
	simulation_path: str \| Path = SIMULATION_DATASET_PATH,
	model_path: str \| Path = SIMULATION_MODEL_PATH,
	metadata_path: str \| Path = SIMULATION_METADATA_PATH,
	sample_size: int = SIMULATION_SAMPLE_SIZE,
	) -> tuple[LoadedModel, pd.DataFrame]:
	"""Charge ou regenere le modele local de simulation.

	Args:
	force_retrain: Force le reentrainement meme si les artefacts existent.
	save_artifact: Ecrit les artefacts sur disque si `True`.
	simulation_path: Source tabulaire du modele local.
	model_path: Chemin cible du pipeline serialize.
	metadata_path: Chemin cible des metadonnees JSON.
	sample_size: Taille maximale de l'echantillon d'entrainement.

	Returns:
	tuple[LoadedModel, pd.DataFrame]: Modele local et dataset normalise.
	"""
	resolved_model_path = _resolve_path(model_path)
	resolved_metadata_path = _resolve_path(metadata_path)
	simulation_df = load_and_prepare_simulation_dataset(simulation_path)

	if not force_retrain and resolved_model_path.exists() and resolved_metadata_path.exists():
	loaded = LoadedModel(
	pipeline=joblib.load(resolved_model_path),
	metadata=json.loads(resolved_metadata_path.read_text(encoding="utf-8")),
	)
	return loaded, simulation_df

	trained = _fit_simulation_pipeline(simulation_df, sample_size=sample_size)
	loaded = LoadedModel(pipeline=trained["pipeline"], metadata=trained["metadata"])

	if save_artifact:
	_ensure_parent_dir(resolved_model_path)
	joblib.dump(loaded.pipeline, resolved_model_path)
	resolved_metadata_path.write_text(
	json.dumps(_json_ready(loaded.metadata), indent=2, ensure_ascii=True),
	encoding="utf-8",
	)

	return loaded, simulation_df


	def infer_target_year_from_metadata_or_dataset(
	historical_metadata: dict[str, Any],
	historical_df: pd.DataFrame,
	) -> int:
	"""Determine l'annee cible du modele historique."""
	target_year = historical_metadata.get("target_year")
	if target_year is not None:
	return int(target_year)

	target_columns = [col for col in historical_df.columns if col.startswith("target_yield_t_ha_")]
	available_years = sorted(
	int(col.rsplit("_", 1)[1])
	for col in target_columns
	if historical_df[col].notna().any()
	)
	if not available_years:
	raise ValueError("No usable historical target year found.")
	return int(max(available_years))


	def latest_available_from_row(row: pd.Series, prefix: str, years: list[int]) -> tuple[float, int \| None]:
	"""Recupere la derniere valeur non nulle disponible pour une serie annuelle."""
	for year in sorted(years, reverse=True):
	value = row.get(f"{prefix}_{year}", np.nan)
	if pd.notna(value):
	return float(value), year
	return np.nan, None


	def build_historical_reference_frame(
	historical_df: pd.DataFrame,
	*,
	target_year: int,
	) -> pd.DataFrame:
	"""Construit les reperes climatiques utilises comme reference locale."""
	feature_years = [year for year in range(target_year) if year >= 0]
	rainfall_years = [
	year for year in feature_years if f"average_rain_fall_mm_per_year_{year}" in historical_df.columns
	]
	temperature_years = [year for year in feature_years if f"avg_temp_{year}" in historical_df.columns]

	reference_df = historical_df[["area", "crop"]].copy()
	reference_df[["reference_rainfall_mm", "reference_rainfall_year"]] = historical_df.apply(
	lambda row: pd.Series(
	latest_available_from_row(row, "average_rain_fall_mm_per_year", rainfall_years)
	),
	axis=1,
	)
	reference_df[["reference_temperature_celsius", "reference_temperature_year"]] = historical_df.apply(
	lambda row: pd.Series(latest_available_from_row(row, "avg_temp", temperature_years)),
	axis=1,
	)

	crop_fallback_df = reference_df.groupby("crop").agg(
	crop_reference_rainfall_mm=("reference_rainfall_mm", "median"),
	crop_reference_temperature_celsius=("reference_temperature_celsius", "median"),
	).reset_index()

	return reference_df.merge(crop_fallback_df, on="crop", how="left")


	def build_simulation_global_reference(simulation_df: pd.DataFrame) -> dict[str, Any]:
	"""Construit un profil global median/modal pour le modele de simulation."""
	return {
	"region": simulation_df["region"].mode().iloc[0],
	"soil_type": simulation_df["soil_type"].mode().iloc[0],
	"rainfall_mm": float(simulation_df["rainfall_mm"].median()),
	"temperature_celsius": float(simulation_df["temperature_celsius"].median()),
	"fertilizer_used": bool(simulation_df["fertilizer_used"].mode().iloc[0]),
	"irrigation_used": bool(simulation_df["irrigation_used"].mode().iloc[0]),
	"weather_condition": simulation_df["weather_condition"].mode().iloc[0],
	"days_to_harvest": float(simulation_df["days_to_harvest"].median()),
	}


	def build_reference_profile_from_row(
	row: pd.Series,
	*,
	simulation_global_reference: dict[str, Any],
	selected_simulation_features: list[str],
	overrides: dict[str, Any] \| None = None,
	) -> tuple[pd.DataFrame, dict[str, str]]:
	"""Construit le profil de reference local pour un couple pays/culture.

	Returns:
	tuple[pd.DataFrame, dict[str, str]]: Profil pret pour l'inference et
	informations de provenance des references pluie/temperature.
	"""
	rainfall_source = (
	"row_latest_history"
	if pd.notna(row["reference_rainfall_mm"])
	else "crop_median"
	if pd.notna(row["crop_reference_rainfall_mm"])
	else "simulation_global_default"
	)
	temperature_source = (
	"row_latest_history"
	if pd.notna(row["reference_temperature_celsius"])
	else "crop_median"
	if pd.notna(row["crop_reference_temperature_celsius"])
	else "simulation_global_default"
	)

	profile = dict(simulation_global_reference)
	profile["rainfall_mm"] = (
	float(row["reference_rainfall_mm"])
	if pd.notna(row["reference_rainfall_mm"])
	else float(row["crop_reference_rainfall_mm"])
	if pd.notna(row["crop_reference_rainfall_mm"])
	else float(simulation_global_reference["rainfall_mm"])
	)
	profile["temperature_celsius"] = (
	float(row["reference_temperature_celsius"])
	if pd.notna(row["reference_temperature_celsius"])
	else float(row["crop_reference_temperature_celsius"])
	if pd.notna(row["crop_reference_temperature_celsius"])
	else float(simulation_global_reference["temperature_celsius"])
	)

	if overrides:
	profile.update(overrides)

	profile_df = pd.DataFrame([profile])[selected_simulation_features]
	return profile_df, {
	"rainfall_reference_source": rainfall_source,
	"temperature_reference_source": temperature_source,
	}


	class AdjustedYieldService:
	"""Service metier principal expose a l'API et a l'interface Streamlit."""

	def __init__(
	self,
	*,
	historical_dataset_path: str \| Path = HISTORICAL_WIDE_DATASET_PATH,
	historical_model_path: str \| Path = HISTORICAL_MODEL_PATH,
	historical_metadata_path: str \| Path = HISTORICAL_METADATA_PATH,
	simulation_dataset_path: str \| Path = SIMULATION_DATASET_PATH,
	simulation_model_path: str \| Path = SIMULATION_MODEL_PATH,
	simulation_metadata_path: str \| Path = SIMULATION_METADATA_PATH,
	force_retrain_simulation: bool = False,
	) -> None:
	"""Initialise les modeles, datasets et catalogues utiles au runtime."""
	self.context = _load_prediction_context(
	historical_dataset_path=historical_dataset_path,
	historical_model_path=historical_model_path,
	historical_metadata_path=historical_metadata_path,
	simulation_dataset_path=simulation_dataset_path,
	simulation_model_path=simulation_model_path,
	simulation_metadata_path=simulation_metadata_path,
	force_retrain_simulation=force_retrain_simulation,
	)

	self.historical_model = self.context["historical_model"]
	self.historical_metadata = self.context["historical_metadata"]
	self.historical_df = self.context["historical_df"]
	self.simulation_model = self.context["simulation_model"]
	self.simulation_metadata = self.context["simulation_metadata"]
	self.simulation_df = self.context["simulation_df"]
	self.simulation_global_reference = self.context["simulation_global_reference"]
	self.strategy_df = self.context["strategy_df"]
	self.target_year = int(self.context["target_year"])
	self.selected_simulation_features = list(self.simulation_metadata["feature_columns"])
	self.available_areas = sorted(self.strategy_df["area"].dropna().unique().tolist())
	self.available_crops = sorted(self.strategy_df["crop"].dropna().unique().tolist())
	self.crops_by_area = {
	area: sorted(area_df["crop"].dropna().unique().tolist())
	for area, area_df in self.strategy_df.groupby("area")
	}
	self.simulation_options = {
	"regions": sorted(self.simulation_df["region"].dropna().unique().tolist()),
	"soil_types": sorted(self.simulation_df["soil_type"].dropna().unique().tolist()),
	"weather_conditions": sorted(self.simulation_df["weather_condition"].dropna().unique().tolist()),
	}
	self._historical_shap_state: dict[str, Any] \| None = None

	def _sanitize_overrides(self, overrides: dict[str, Any] \| None = None) -> dict[str, Any]:
	"""Nettoie les surcharges de conditions avant utilisation."""
	if not overrides:
	return {}

	cleaned: dict[str, Any] = {}
	for key, value in overrides.items():
	if value is None:
	continue
	if key in {"region", "soil_type", "weather_condition"}:
	cleaned[key] = normalize_label(value)
	continue
	if key in {"fertilizer_used", "irrigation_used"}:
	cleaned[key] = bool(value)
	continue
	cleaned[key] = float(value) if key in {"rainfall_mm", "temperature_celsius", "days_to_harvest"} else value
	return cleaned

	def _get_row(self, area: str, crop: str) -> pd.Series:
	"""Recupere la ligne historique unique correspondant au couple demande."""
	return _get_area_crop_row(self.strategy_df, area=area, crop=crop)

	def _predict_p1(self, row: pd.Series) -> float:
	"""Calcule la prediction historique P1 a partir d'une ligne consolidee."""
	return _predict_p1_from_row(row, self.historical_model, self.historical_metadata)

	def _map_transformed_feature_to_raw_feature(
	self,
	transformed_feature_name: str,
	raw_feature_names: list[str],
	) -> str:
	"""Ramene un nom de feature transformee vers sa variable brute d'origine."""
	candidates = [transformed_feature_name]
	if "__" in transformed_feature_name:
	parts = transformed_feature_name.split("__")
	candidates.extend("__".join(parts[index:]) for index in range(1, len(parts)))

	for candidate in candidates:
	for raw_feature in sorted(raw_feature_names, key=len, reverse=True):
	if candidate == raw_feature or candidate.startswith(f"{raw_feature}_"):
	return raw_feature
	return transformed_feature_name

	def _aggregate_transformed_contributions(
	self,
	*,
	transformed_feature_names: list[str],
	contribution_values: np.ndarray,
	raw_feature_names: list[str],
	) -> dict[str, float]:
	"""Agrege les contributions encodees par modalite au niveau variable brute."""
	aggregated: dict[str, float] = {}
	for transformed_feature_name, contribution_value in zip(transformed_feature_names, contribution_values):
	raw_feature_name = self._map_transformed_feature_to_raw_feature(
	transformed_feature_name,
	raw_feature_names,
	)
	aggregated[raw_feature_name] = aggregated.get(raw_feature_name, 0.0) + float(contribution_value)
	return aggregated

	def _ensure_historical_shap_state(self) -> dict[str, Any]:
	"""Initialise a la demande l'etat SHAP du modele historique."""
	if self._historical_shap_state is not None:
	return self._historical_shap_state

	try:
	import shap # type: ignore
	except ModuleNotFoundError:
	self._historical_shap_state = {
	"available": False,
	"status": "missing_dependency",
	"message": "Le package shap n'est pas installe dans l'environnement courant.",
	}
	return self._historical_shap_state

	preprocessor = self.historical_model.named_steps["preprocessor"]
	regressor = self.historical_model.named_steps["regressor"]
	raw_feature_names = list(self.historical_metadata["feature_columns"])
	background_df = self.historical_df[raw_feature_names].sample(
	n=min(200, len(self.historical_df)),
	random_state=SEED,
	)
	background_matrix = preprocessor.transform(background_df)
	transformed_feature_names = list(preprocessor.get_feature_names_out())

	try:
	explainer = shap.Explainer(
	regressor,
	background_matrix,
	feature_names=transformed_feature_names,
	)
	except Exception as exc: # pragma: no cover - defensive fallback
	self._historical_shap_state = {
	"available": False,
	"status": "explainer_initialization_failed",
	"message": f"Impossible d'initialiser SHAP sur le modele historique : {exc}",
	}
	return self._historical_shap_state

	self._historical_shap_state = {
	"available": True,
	"status": "ok",
	"message": None,
	"explainer": explainer,
	"preprocessor": preprocessor,
	"transformed_feature_names": transformed_feature_names,
	"raw_feature_names": raw_feature_names,
	}
	return self._historical_shap_state

	def _explain_historical_prediction(
	self,
	*,
	row: pd.Series,
	p1_prediction: float,
	top_n: int = 10,
	) -> dict[str, Any]:
	"""Produit l'explication SHAP agregee de la prediction P1."""
	shap_state = self._ensure_historical_shap_state()
	if not shap_state["available"]:
	return {
	"available": False,
	"status": shap_state["status"],
	"message": shap_state["message"],
	"model_prediction": p1_prediction,
	"base_value": None,
	"prediction_from_shap": None,
	"top_contributions": [],
	}

	raw_feature_names = shap_state["raw_feature_names"]
	feature_frame = pd.DataFrame([row[raw_feature_names].to_dict()])[raw_feature_names]
	transformed_row = shap_state["preprocessor"].transform(feature_frame)
	shap_values = shap_state["explainer"](transformed_row)
	contribution_vector = np.asarray(shap_values.values)[0]
	aggregated_contributions = self._aggregate_transformed_contributions(
	transformed_feature_names=shap_state["transformed_feature_names"],
	contribution_values=contribution_vector,
	raw_feature_names=raw_feature_names,
	)

	top_contributions = [
	{
	"feature": raw_feature_name,
	"raw_value": _value_for_display(row.get(raw_feature_name)),
	"contribution": float(contribution),
	"abs_contribution": abs(float(contribution)),
	}
	for raw_feature_name, contribution in sorted(
	aggregated_contributions.items(),
	key=lambda item: abs(item[1]),
	reverse=True,
	)[:top_n]
	]

	base_value = _safe_float(shap_values.base_values)
	prediction_from_shap = float(base_value + contribution_vector.sum())

	return {
	"available": True,
	"status": "ok",
	"message": None,
	"model_prediction": p1_prediction,
	"base_value": base_value,
	"prediction_from_shap": prediction_from_shap,
	"top_contributions": top_contributions,
	}

	def _explain_local_adjustment(
	self,
	*,
	reference_profile: pd.DataFrame,
	user_profile: pd.DataFrame,
	p2_prediction: float,
	p3_prediction: float,
	top_n: int = 10,
	) -> dict[str, Any]:
	"""Decompose lineairement l'ajustement local applique entre P2 et P3."""
	preprocessor = self.simulation_model.named_steps["preprocessor"]
	regressor = self.simulation_model.named_steps["regressor"]
	transformed_feature_names = list(preprocessor.get_feature_names_out())
	reference_vector = np.asarray(preprocessor.transform(reference_profile))[0]
	user_vector = np.asarray(preprocessor.transform(user_profile))[0]
	delta_vector = user_vector - reference_vector
	coefficient_vector = np.asarray(regressor.coef_).reshape(-1)
	contribution_vector = delta_vector * coefficient_vector
	aggregated_contributions = self._aggregate_transformed_contributions(
	transformed_feature_names=transformed_feature_names,
	contribution_values=contribution_vector,
	raw_feature_names=self.selected_simulation_features,
	)

	reference_row = reference_profile.iloc[0].to_dict()
	user_row = user_profile.iloc[0].to_dict()
	top_contributions = [
	{
	"feature": raw_feature_name,
	"reference_value": _value_for_display(reference_row.get(raw_feature_name)),
	"user_value": _value_for_display(user_row.get(raw_feature_name)),
	"contribution_delta": float(contribution),
	"abs_contribution_delta": abs(float(contribution)),
	}
	for raw_feature_name, contribution in sorted(
	aggregated_contributions.items(),
	key=lambda item: abs(item[1]),
	reverse=True,
	)[:top_n]
	]

	return {
	"method": "exact_linear_delta_decomposition",
	"reference_prediction": p2_prediction,
	"user_prediction": p3_prediction,
	"total_adjustment": float(p3_prediction - p2_prediction),
	"top_contributions": top_contributions,
	}

	def get_reference_profile(
	self,
	area: str,
	crop: str,
	*,
	reference_overrides: dict[str, Any] \| None = None,
	) -> dict[str, Any]:
	"""Retourne le profil de reference local pour un pays et une culture."""
	row = self._get_row(area, crop)
	normalized_reference_overrides = self._sanitize_overrides(reference_overrides)
	reference_profile, reference_sources = build_reference_profile_from_row(
	row,
	simulation_global_reference=self.simulation_global_reference,
	selected_simulation_features=self.selected_simulation_features,
	overrides=normalized_reference_overrides,
	)
	return {
	"country": normalize_label(area),
	"crop": normalize_label(crop),
	"reference_profile": reference_profile.iloc[0].to_dict(),
	**reference_sources,
	}

	def get_baseline(
	self,
	area: str,
	crop: str,
	*,
	reference_overrides: dict[str, Any] \| None = None,
	) -> dict[str, Any]:
	"""Retourne la prediction historique de base et son profil de reference."""
	row = self._get_row(area, crop)
	reference_payload = self.get_reference_profile(
	area,
	crop,
	reference_overrides=reference_overrides,
	)
	p1 = self._predict_p1(row)
	return {
	"country": normalize_label(area),
	"crop": normalize_label(crop),
	"target_year": self.target_year,
	"p1_historical_prediction": p1,
	"reference_profile": reference_payload["reference_profile"],
	"rainfall_reference_source": reference_payload["rainfall_reference_source"],
	"temperature_reference_source": reference_payload["temperature_reference_source"],
	}

	def predict_adjusted_yield(
	self,
	area: str,
	crop: str,
	user_conditions: dict[str, Any],
	*,
	reference_overrides: dict[str, Any] \| None = None,
	) -> dict[str, Any]:
	"""Calcule le rendement final ajuste pour une culture donnee.

	Args:
	area: Pays ou zone retenue.
	crop: Culture cible.
	user_conditions: Conditions locales saisies par l'utilisateur.
	reference_overrides: Surcharges appliquees au profil de reference.

	Returns:
	dict[str, Any]: Detail complet des composantes P1, P2, P3 et des
	explications associees.
	"""
	row = self._get_row(area, crop)
	normalized_reference_overrides = self._sanitize_overrides(reference_overrides)
	normalized_user_conditions = self._sanitize_overrides(user_conditions)
	merged_user_overrides = {
	**normalized_reference_overrides,
	**normalized_user_conditions,
	}

	reference_profile, reference_sources = build_reference_profile_from_row(
	row,
	simulation_global_reference=self.simulation_global_reference,
	selected_simulation_features=self.selected_simulation_features,
	overrides=normalized_reference_overrides,
	)
	user_profile, _ = build_reference_profile_from_row(
	row,
	simulation_global_reference=self.simulation_global_reference,
	selected_simulation_features=self.selected_simulation_features,
	overrides=merged_user_overrides,
	)

	p1 = self._predict_p1(row)
	p2 = float(self.simulation_model.predict(reference_profile)[0])
	p3 = float(self.simulation_model.predict(user_profile)[0])
	local_adjustment = float(p3 - p2)
	final_prediction = float(max(p1 + local_adjustment, 0.0))
	gap_vs_historical_pct = float(local_adjustment / p1 * 100.0) if p1 != 0 else 0.0
	explanation = {
	"historical_shap": self._explain_historical_prediction(
	row=row,
	p1_prediction=p1,
	),
	"local_adjustment": self._explain_local_adjustment(
	reference_profile=reference_profile,
	user_profile=user_profile,
	p2_prediction=p2,
	p3_prediction=p3,
	),
	}

	return {
	"country": normalize_label(area),
	"crop": normalize_label(crop),
	"p1_historical_prediction": p1,
	"p2_reference_simulation": p2,
	"p3_user_simulation": p3,
	"local_adjustment": local_adjustment,
	"gap_vs_historical_pct": gap_vs_historical_pct,
	"final_prediction": final_prediction,
	"reference_profile": reference_profile.iloc[0].to_dict(),
	"user_profile": user_profile.iloc[0].to_dict(),
	"explanation": explanation,
	**reference_sources,
	}

	def recommend_crops(
	self,
	area: str,
	user_conditions: dict[str, Any],
	candidate_crops: list[str] \| None = None,
	*,
	reference_overrides: dict[str, Any] \| None = None,
	) -> pd.DataFrame:
	"""Classe les cultures candidates pour un pays et des conditions locales."""
	normalized_area = normalize_label(area)
	area_rows = self.strategy_df.loc[self.strategy_df["area"] == normalized_area].copy()
	if area_rows.empty:
	raise ValueError(f"No historical rows found for area={normalized_area!r}.")

	if candidate_crops:
	normalized_candidates = {normalize_label(crop) for crop in candidate_crops if normalize_label(crop)}
	area_rows = area_rows.loc[area_rows["crop"].isin(normalized_candidates)].copy()
	if area_rows.empty:
	raise ValueError(f"No matching crop found for area={normalized_area!r} and provided candidates.")

	normalized_reference_overrides = self._sanitize_overrides(reference_overrides)
	normalized_user_conditions = self._sanitize_overrides(user_conditions)
	merged_user_overrides = {
	**normalized_reference_overrides,
	**normalized_user_conditions,
	}
	recommendation_rows = []
	for _, row in area_rows.sort_values("crop").iterrows():
	reference_profile, reference_sources = build_reference_profile_from_row(
	row,
	simulation_global_reference=self.simulation_global_reference,
	selected_simulation_features=self.selected_simulation_features,
	overrides=normalized_reference_overrides,
	)
	user_profile, _ = build_reference_profile_from_row(
	row,
	simulation_global_reference=self.simulation_global_reference,
	selected_simulation_features=self.selected_simulation_features,
	overrides=merged_user_overrides,
	)

	p1 = self._predict_p1(row)
	p2 = float(self.simulation_model.predict(reference_profile)[0])
	p3 = float(self.simulation_model.predict(user_profile)[0])
	local_adjustment = float(p3 - p2)
	final_prediction = float(max(p1 + local_adjustment, 0.0))
	gap_vs_historical_pct = float(local_adjustment / p1 * 100.0) if p1 != 0 else 0.0

	recommendation_rows.append(
	{
	"country": normalized_area,
	"crop": row["crop"],
	"p1_historical_prediction": p1,
	"p2_reference_simulation": p2,
	"p3_user_simulation": p3,
	"local_adjustment": local_adjustment,
	"gap_vs_historical_pct": gap_vs_historical_pct,
	"final_prediction": final_prediction,
	"rainfall_reference_source": reference_sources["rainfall_reference_source"],
	"temperature_reference_source": reference_sources["temperature_reference_source"],
	}
	)

	recommendation_df = (
	pd.DataFrame(recommendation_rows)
	.sort_values(["final_prediction", "p1_historical_prediction"], ascending=[False, False])
	.reset_index(drop=True)
	)
	recommendation_df["recommendation_rank"] = np.arange(1, len(recommendation_df) + 1)

	ordered_columns = [
	"country",
	"crop",
	"p1_historical_prediction",
	"p2_reference_simulation",
	"p3_user_simulation",
	"local_adjustment",
	"gap_vs_historical_pct",
	"final_prediction",
	"recommendation_rank",
	"rainfall_reference_source",
	"temperature_reference_source",
	]
	return recommendation_df[ordered_columns]


	def _load_prediction_context(
	*,
	historical_dataset_path: str \| Path = HISTORICAL_WIDE_DATASET_PATH,
	historical_model_path: str \| Path = HISTORICAL_MODEL_PATH,
	historical_metadata_path: str \| Path = HISTORICAL_METADATA_PATH,
	simulation_dataset_path: str \| Path = SIMULATION_DATASET_PATH,
	simulation_model_path: str \| Path = SIMULATION_MODEL_PATH,
	simulation_metadata_path: str \| Path = SIMULATION_METADATA_PATH,
	force_retrain_simulation: bool = False,
	) -> dict[str, Any]:
	"""Charge l'ensemble des briques necessaires au runtime final."""
	historical_loaded = load_historical_model(
	model_path=historical_model_path,
	metadata_path=historical_metadata_path,
	)
	historical_df = load_historical_wide_dataset(historical_dataset_path)
	simulation_loaded, simulation_df = load_or_train_simulation_model(
	force_retrain=force_retrain_simulation,
	simulation_path=simulation_dataset_path,
	model_path=simulation_model_path,
	metadata_path=simulation_metadata_path,
	)

	target_year = infer_target_year_from_metadata_or_dataset(historical_loaded.metadata, historical_df)
	reference_df = build_historical_reference_frame(historical_df, target_year=target_year)
	strategy_df = historical_df.merge(reference_df, on=["area", "crop"], how="left")

	return {
	"historical_model": historical_loaded.pipeline,
	"historical_metadata": historical_loaded.metadata,
	"historical_df": historical_df,
	"simulation_model": simulation_loaded.pipeline,
	"simulation_metadata": simulation_loaded.metadata,
	"simulation_df": simulation_df,
	"simulation_global_reference": build_simulation_global_reference(simulation_df),
	"strategy_df": strategy_df,
	"target_year": target_year,
	}


	def _predict_p1_from_row(
	row: pd.Series,
	historical_model: Pipeline,
	historical_metadata: dict[str, Any],
	) -> float:
	"""Projette une ligne historique consolidee dans le modele P1."""
	feature_columns = historical_metadata["feature_columns"]
	feature_frame = pd.DataFrame([row[feature_columns].to_dict()])[feature_columns]
	return float(historical_model.predict(feature_frame)[0])


	def _get_area_crop_row(strategy_df: pd.DataFrame, area: str, crop: str) -> pd.Series:
	"""Retourne la ligne unique correspondant a un couple pays/culture."""
	normalized_area = normalize_label(area)
	normalized_crop = normalize_label(crop)
	filtered = strategy_df.loc[
	(strategy_df["area"] == normalized_area) & (strategy_df["crop"] == normalized_crop)
	].copy()

	if filtered.empty:
	raise ValueError(f"No historical row found for area={normalized_area!r}, crop={normalized_crop!r}.")
	if len(filtered) > 1:
	raise ValueError(f"Multiple historical rows found for area={normalized_area!r}, crop={normalized_crop!r}.")

	return filtered.iloc[0]


	def predict_adjusted_yield(
	*,
	area: str,
	crop: str,
	user_conditions: dict[str, Any],
	historical_dataset_path: str \| Path = HISTORICAL_WIDE_DATASET_PATH,
	historical_model_path: str \| Path = HISTORICAL_MODEL_PATH,
	historical_metadata_path: str \| Path = HISTORICAL_METADATA_PATH,
	simulation_dataset_path: str \| Path = SIMULATION_DATASET_PATH,
	simulation_model_path: str \| Path = SIMULATION_MODEL_PATH,
	simulation_metadata_path: str \| Path = SIMULATION_METADATA_PATH,
	force_retrain_simulation: bool = False,
	) -> dict[str, Any]:
	"""Helper procedural pour calculer un rendement ajuste sans gerer le service."""
	service = AdjustedYieldService(
	historical_dataset_path=historical_dataset_path,
	historical_model_path=historical_model_path,
	historical_metadata_path=historical_metadata_path,
	simulation_dataset_path=simulation_dataset_path,
	simulation_model_path=simulation_model_path,
	simulation_metadata_path=simulation_metadata_path,
	force_retrain_simulation=force_retrain_simulation,
	)
	return service.predict_adjusted_yield(area=area, crop=crop, user_conditions=user_conditions)


	def recommend_crops(
	*,
	area: str,
	user_conditions: dict[str, Any],
	historical_dataset_path: str \| Path = HISTORICAL_WIDE_DATASET_PATH,
	historical_model_path: str \| Path = HISTORICAL_MODEL_PATH,
	historical_metadata_path: str \| Path = HISTORICAL_METADATA_PATH,
	simulation_dataset_path: str \| Path = SIMULATION_DATASET_PATH,
	simulation_model_path: str \| Path = SIMULATION_MODEL_PATH,
	simulation_metadata_path: str \| Path = SIMULATION_METADATA_PATH,
	force_retrain_simulation: bool = False,
	) -> pd.DataFrame:
	"""Helper procedural pour classer les cultures candidates."""
	service = AdjustedYieldService(
	historical_dataset_path=historical_dataset_path,
	historical_model_path=historical_model_path,
	historical_metadata_path=historical_metadata_path,
	simulation_dataset_path=simulation_dataset_path,
	simulation_model_path=simulation_model_path,
	simulation_metadata_path=simulation_metadata_path,
	force_retrain_simulation=force_retrain_simulation,
	)
	return service.recommend_crops(area=area, user_conditions=user_conditions)