Spaces:
Sleeping
Sleeping
File size: 1,658 Bytes
3b97d72 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 | import numpy as np
import pandas as pd
def _safe_div(a, b):
a = pd.to_numeric(a, errors="coerce")
b = pd.to_numeric(b, errors="coerce").replace(0, np.nan)
return (a / b).fillna(0.0)
def compute_features(df: pd.DataFrame) -> pd.DataFrame:
SAT_COLS = [
"satisfaction_employee_environnement",
"satisfaction_employee_nature_travail",
"satisfaction_employee_equipe",
"satisfaction_employee_equilibre_pro_perso",
]
X = df.copy()
X["sat_mean"] = X[SAT_COLS].astype(float).mean(axis=1)
X["sat_std"] = X[SAT_COLS].astype(float).std(axis=1, ddof=0)
X["delta_eval"] = (
X["note_evaluation_actuelle"].astype(float)
- X["note_evaluation_precedente"].astype(float)
)
X["ratio_post_stab"] = _safe_div(X["annes_sous_responsable_actuel"], X["annees_dans_le_poste_actuel"])
X["revenu_par_niveau"] = _safe_div(X["revenu_mensuel"], X["niveau_hierarchique_poste"])
age_bins = [-np.inf, 25, 35, 45, 60, np.inf]
dist_bins = [-np.inf, 5, 10, 20, np.inf]
revenu_bins = [-np.inf, 2500, 4000, 6000, np.inf]
sat_mean_bins = [-np.inf, 2.0, 3.0, 4.0, np.inf]
X["tranche_age"] = pd.cut(X["age"].astype(float), age_bins, labels=["<=25","26-35","36-45","46-60","60+"])
X["tranche_distance"] = pd.cut(X["distance_domicile_travail"].astype(float), dist_bins, labels=["<=5","6-10","11-20",">20"])
X["tranche_revenu"] = pd.cut(X["revenu_mensuel"].astype(float), revenu_bins, labels=["<=2.5k","2.5-4k","4-6k",">6k"])
X["tranche_sat_mean"] = pd.cut(X["sat_mean"], sat_mean_bins, labels=["basse","moyenne","bonne","excellente"])
return X
|