Spaces:
Sleeping
Sleeping
| import numpy as np | |
| import pandas as pd | |
| def _safe_div(a, b): | |
| a = pd.to_numeric(a, errors="coerce") | |
| b = pd.to_numeric(b, errors="coerce").replace(0, np.nan) | |
| return (a / b).fillna(0.0) | |
| def compute_features(df: pd.DataFrame) -> pd.DataFrame: | |
| SAT_COLS = [ | |
| "satisfaction_employee_environnement", | |
| "satisfaction_employee_nature_travail", | |
| "satisfaction_employee_equipe", | |
| "satisfaction_employee_equilibre_pro_perso", | |
| ] | |
| X = df.copy() | |
| X["sat_mean"] = X[SAT_COLS].astype(float).mean(axis=1) | |
| X["sat_std"] = X[SAT_COLS].astype(float).std(axis=1, ddof=0) | |
| X["delta_eval"] = ( | |
| X["note_evaluation_actuelle"].astype(float) | |
| - X["note_evaluation_precedente"].astype(float) | |
| ) | |
| X["ratio_post_stab"] = _safe_div(X["annes_sous_responsable_actuel"], X["annees_dans_le_poste_actuel"]) | |
| X["revenu_par_niveau"] = _safe_div(X["revenu_mensuel"], X["niveau_hierarchique_poste"]) | |
| age_bins = [-np.inf, 25, 35, 45, 60, np.inf] | |
| dist_bins = [-np.inf, 5, 10, 20, np.inf] | |
| revenu_bins = [-np.inf, 2500, 4000, 6000, np.inf] | |
| sat_mean_bins = [-np.inf, 2.0, 3.0, 4.0, np.inf] | |
| X["tranche_age"] = pd.cut(X["age"].astype(float), age_bins, labels=["<=25","26-35","36-45","46-60","60+"]) | |
| X["tranche_distance"] = pd.cut(X["distance_domicile_travail"].astype(float), dist_bins, labels=["<=5","6-10","11-20",">20"]) | |
| X["tranche_revenu"] = pd.cut(X["revenu_mensuel"].astype(float), revenu_bins, labels=["<=2.5k","2.5-4k","4-6k",">6k"]) | |
| X["tranche_sat_mean"] = pd.cut(X["sat_mean"], sat_mean_bins, labels=["basse","moyenne","bonne","excellente"]) | |
| return X | |