{ "cells": [ { "cell_type": "markdown", "id": "8cea6264-0c8f-4a78-9dbd-bd773264ff39", "metadata": {}, "source": [ "# Project 4 - Identifiez les causes d'attrition au sein d'une ESN¶" ] }, { "cell_type": "markdown", "id": "8b29558f-1ab6-4788-b670-3b8614b8ddfe", "metadata": {}, "source": [ "### Préparation des données pour la modelisation" ] }, { "cell_type": "markdown", "id": "c816566c-bd23-4e6b-b993-b4d703b9839f", "metadata": {}, "source": [ "**Objectif** : préparer les données pour la modélisation.\n", "\n", "**Contenu :**\n", "\n", "- Nettoyage final\n", "\n", "- Encodage des variables catégorielles\n", "\n", "- Normalisation ou standardisation\n", "\n", "- Sauvegarde des données prêtes dans /data/" ] }, { "cell_type": "code", "execution_count": 3, "id": "0957099e-ca94-4ec4-b207-9b92d1e280d6", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "#import matplotlib.pyplot as plt\n", "#import seaborn as sns\n", "\n", "# Prétraitement et modélisation\n", "from sklearn.preprocessing import StandardScaler, OrdinalEncoder, OneHotEncoder\n" ] }, { "cell_type": "code", "execution_count": 4, "id": "0fd20980-230b-40c5-9106-29f2bdcda492", "metadata": {}, "outputs": [], "source": [ "df_central = pd.read_csv(\"../data/processed/df_central.csv\")" ] }, { "cell_type": "code", "execution_count": 5, "id": "48fa578e-3199-4a13-b1ca-c087c9954d15", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | id_employee | \n", "age | \n", "genre | \n", "revenu_mensuel | \n", "statut_marital | \n", "departement | \n", "poste | \n", "nombre_experiences_precedentes | \n", "nombre_heures_travailless | \n", "annee_experience_totale | \n", "... | \n", "nombre_participation_pee | \n", "nb_formations_suivies | \n", "nombre_employee_sous_responsabilite | \n", "distance_domicile_travail | \n", "niveau_education | \n", "domaine_etude | \n", "ayant_enfants | \n", "frequence_deplacement | \n", "annees_depuis_la_derniere_promotion | \n", "annes_sous_responsable_actuel | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "1 | \n", "41 | \n", "F | \n", "5993 | \n", "Célibataire | \n", "Commercial | \n", "Cadre Commercial | \n", "8 | \n", "80 | \n", "8 | \n", "... | \n", "0 | \n", "0 | \n", "1 | \n", "1 | \n", "2 | \n", "Infra & Cloud | \n", "Y | \n", "Occasionnel | \n", "0 | \n", "5 | \n", "
| 1 | \n", "2 | \n", "49 | \n", "M | \n", "5130 | \n", "Marié(e) | \n", "Consulting | \n", "Assistant de Direction | \n", "1 | \n", "80 | \n", "10 | \n", "... | \n", "1 | \n", "3 | \n", "1 | \n", "8 | \n", "1 | \n", "Infra & Cloud | \n", "Y | \n", "Frequent | \n", "1 | \n", "7 | \n", "
| 2 | \n", "4 | \n", "37 | \n", "M | \n", "2090 | \n", "Célibataire | \n", "Consulting | \n", "Consultant | \n", "6 | \n", "80 | \n", "7 | \n", "... | \n", "0 | \n", "3 | \n", "1 | \n", "2 | \n", "2 | \n", "Autre | \n", "Y | \n", "Occasionnel | \n", "0 | \n", "0 | \n", "
| 3 | \n", "5 | \n", "33 | \n", "F | \n", "2909 | \n", "Marié(e) | \n", "Consulting | \n", "Assistant de Direction | \n", "1 | \n", "80 | \n", "8 | \n", "... | \n", "0 | \n", "3 | \n", "1 | \n", "3 | \n", "4 | \n", "Infra & Cloud | \n", "Y | \n", "Frequent | \n", "3 | \n", "0 | \n", "
| 4 | \n", "7 | \n", "27 | \n", "M | \n", "3468 | \n", "Marié(e) | \n", "Consulting | \n", "Consultant | \n", "9 | \n", "80 | \n", "6 | \n", "... | \n", "1 | \n", "3 | \n", "1 | \n", "2 | \n", "1 | \n", "Transformation Digitale | \n", "Y | \n", "Occasionnel | \n", "2 | \n", "2 | \n", "
5 rows × 32 columns
\n", "| \n", " | niveau_hierarchique_poste | \n", "revenu_mensuel | \n", "
|---|---|---|
| niveau_hierarchique_poste | \n", "1.000000 | \n", "0.920429 | \n", "
| revenu_mensuel | \n", "0.920429 | \n", "1.000000 | \n", "
| \n", " | age | \n", "genre | \n", "revenu_mensuel | \n", "statut_marital | \n", "departement | \n", "poste | \n", "nombre_experiences_precedentes | \n", "annee_experience_totale | \n", "annees_dans_l_entreprise | \n", "annees_dans_le_poste_actuel | \n", "... | \n", "augementation_salaire_precedente | \n", "attrition | \n", "nombre_participation_pee | \n", "nb_formations_suivies | \n", "distance_domicile_travail | \n", "niveau_education | \n", "domaine_etude | \n", "frequence_deplacement | \n", "annees_depuis_la_derniere_promotion | \n", "annes_sous_responsable_actuel | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "41 | \n", "F | \n", "5993 | \n", "Célibataire | \n", "Commercial | \n", "Cadre Commercial | \n", "8 | \n", "8 | \n", "6 | \n", "4 | \n", "... | \n", "11 | \n", "1 | \n", "0 | \n", "0 | \n", "1 | \n", "2 | \n", "Infra & Cloud | \n", "Occasionnel | \n", "0 | \n", "5 | \n", "
| 1 | \n", "49 | \n", "M | \n", "5130 | \n", "Marié(e) | \n", "Consulting | \n", "Assistant de Direction | \n", "1 | \n", "10 | \n", "10 | \n", "7 | \n", "... | \n", "23 | \n", "0 | \n", "1 | \n", "3 | \n", "8 | \n", "1 | \n", "Infra & Cloud | \n", "Frequent | \n", "1 | \n", "7 | \n", "
| 2 | \n", "37 | \n", "M | \n", "2090 | \n", "Célibataire | \n", "Consulting | \n", "Consultant | \n", "6 | \n", "7 | \n", "0 | \n", "0 | \n", "... | \n", "15 | \n", "1 | \n", "0 | \n", "3 | \n", "2 | \n", "2 | \n", "Autre | \n", "Occasionnel | \n", "0 | \n", "0 | \n", "
3 rows × 27 columns
\n", "| \n", " | satisfaction_employee_environnement | \n", "satisfaction_employee_nature_travail | \n", "satisfaction_employee_equipe | \n", "satisfaction_employee_equilibre_pro_perso | \n", "note_evaluation_precedente | \n", "note_evaluation_actuelle | \n", "niveau_education | \n", "
|---|---|---|---|---|---|---|---|
| 0 | \n", "2 | \n", "4 | \n", "1 | \n", "1 | \n", "3 | \n", "3 | \n", "2 | \n", "
| 1 | \n", "3 | \n", "2 | \n", "4 | \n", "3 | \n", "2 | \n", "4 | \n", "1 | \n", "
| 2 | \n", "4 | \n", "3 | \n", "2 | \n", "3 | \n", "2 | \n", "3 | \n", "2 | \n", "
| 3 | \n", "4 | \n", "3 | \n", "3 | \n", "3 | \n", "3 | \n", "3 | \n", "4 | \n", "
| 4 | \n", "1 | \n", "2 | \n", "4 | \n", "3 | \n", "3 | \n", "3 | \n", "1 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 1465 | \n", "3 | \n", "4 | \n", "3 | \n", "3 | \n", "4 | \n", "3 | \n", "2 | \n", "
| 1466 | \n", "4 | \n", "1 | \n", "1 | \n", "3 | \n", "2 | \n", "3 | \n", "1 | \n", "
| 1467 | \n", "2 | \n", "2 | \n", "2 | \n", "3 | \n", "4 | \n", "4 | \n", "3 | \n", "
| 1468 | \n", "4 | \n", "2 | \n", "4 | \n", "2 | \n", "2 | \n", "3 | \n", "3 | \n", "
| 1469 | \n", "2 | \n", "3 | \n", "1 | \n", "4 | \n", "4 | \n", "3 | \n", "3 | \n", "
1470 rows × 7 columns
\n", "| \n", " | genre | \n", "statut_marital | \n", "departement | \n", "poste | \n", "domaine_etude | \n", "
|---|---|---|---|---|---|
| 0 | \n", "F | \n", "Célibataire | \n", "Commercial | \n", "Cadre Commercial | \n", "Infra & Cloud | \n", "
| 1 | \n", "M | \n", "Marié(e) | \n", "Consulting | \n", "Assistant de Direction | \n", "Infra & Cloud | \n", "
| 2 | \n", "M | \n", "Célibataire | \n", "Consulting | \n", "Consultant | \n", "Autre | \n", "
| 3 | \n", "F | \n", "Marié(e) | \n", "Consulting | \n", "Assistant de Direction | \n", "Infra & Cloud | \n", "
| 4 | \n", "M | \n", "Marié(e) | \n", "Consulting | \n", "Consultant | \n", "Transformation Digitale | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 1465 | \n", "M | \n", "Marié(e) | \n", "Consulting | \n", "Consultant | \n", "Transformation Digitale | \n", "
| 1466 | \n", "M | \n", "Marié(e) | \n", "Consulting | \n", "Manager | \n", "Transformation Digitale | \n", "
| 1467 | \n", "M | \n", "Marié(e) | \n", "Consulting | \n", "Tech Lead | \n", "Infra & Cloud | \n", "
| 1468 | \n", "M | \n", "Marié(e) | \n", "Commercial | \n", "Cadre Commercial | \n", "Transformation Digitale | \n", "
| 1469 | \n", "M | \n", "Marié(e) | \n", "Consulting | \n", "Consultant | \n", "Transformation Digitale | \n", "
1470 rows × 5 columns
\n", "| \n", " | genre_F | \n", "genre_M | \n", "statut_marital_Célibataire | \n", "statut_marital_Divorcé(e) | \n", "statut_marital_Marié(e) | \n", "departement_Commercial | \n", "departement_Consulting | \n", "departement_Ressources Humaines | \n", "poste_Assistant de Direction | \n", "poste_Cadre Commercial | \n", "... | \n", "poste_Représentant Commercial | \n", "poste_Ressources Humaines | \n", "poste_Senior Manager | \n", "poste_Tech Lead | \n", "domaine_etude_Autre | \n", "domaine_etude_Entrepreunariat | \n", "domaine_etude_Infra & Cloud | \n", "domaine_etude_Marketing | \n", "domaine_etude_Ressources Humaines | \n", "domaine_etude_Transformation Digitale | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "1 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "
| 1 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "1 | \n", "0 | \n", "1 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "
| 2 | \n", "0 | \n", "1 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
| 3 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "1 | \n", "0 | \n", "1 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "
| 4 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 1465 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "
| 1466 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "
| 1467 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "
| 1468 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "1 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "
| 1469 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "
1470 rows × 23 columns
\n", "