File size: 1,658 Bytes
3b97d72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import numpy as np
import pandas as pd

def _safe_div(a, b):
    a = pd.to_numeric(a, errors="coerce")
    b = pd.to_numeric(b, errors="coerce").replace(0, np.nan)
    return (a / b).fillna(0.0)

def compute_features(df: pd.DataFrame) -> pd.DataFrame:
    SAT_COLS = [
        "satisfaction_employee_environnement",
        "satisfaction_employee_nature_travail",
        "satisfaction_employee_equipe",
        "satisfaction_employee_equilibre_pro_perso",
    ]

    X = df.copy()
    X["sat_mean"] = X[SAT_COLS].astype(float).mean(axis=1)
    X["sat_std"]  = X[SAT_COLS].astype(float).std(axis=1, ddof=0)
    X["delta_eval"] = (
        X["note_evaluation_actuelle"].astype(float)
        - X["note_evaluation_precedente"].astype(float)
    )

    X["ratio_post_stab"]   = _safe_div(X["annes_sous_responsable_actuel"], X["annees_dans_le_poste_actuel"])
    X["revenu_par_niveau"] = _safe_div(X["revenu_mensuel"], X["niveau_hierarchique_poste"])

    age_bins      = [-np.inf, 25, 35, 45, 60, np.inf]
    dist_bins     = [-np.inf, 5, 10, 20, np.inf]
    revenu_bins   = [-np.inf, 2500, 4000, 6000, np.inf]
    sat_mean_bins = [-np.inf, 2.0, 3.0, 4.0, np.inf]

    X["tranche_age"]      = pd.cut(X["age"].astype(float), age_bins, labels=["<=25","26-35","36-45","46-60","60+"])
    X["tranche_distance"] = pd.cut(X["distance_domicile_travail"].astype(float), dist_bins, labels=["<=5","6-10","11-20",">20"])
    X["tranche_revenu"]   = pd.cut(X["revenu_mensuel"].astype(float), revenu_bins, labels=["<=2.5k","2.5-4k","4-6k",">6k"])
    X["tranche_sat_mean"] = pd.cut(X["sat_mean"], sat_mean_bins, labels=["basse","moyenne","bonne","excellente"])

    return X