File size: 4,935 Bytes
0e2c35a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "file_path='https://raw.githubusercontent.com/aaubs/ds-master/main/apps/M1-attrition-streamlit/HR-Employee-Attrition-synth.csv'\n",
    "\n",
    "data=pd.read_csv(file_path)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 2000 entries, 0 to 1999\n",
      "Data columns (total 36 columns):\n",
      " #   Column                    Non-Null Count  Dtype \n",
      "---  ------                    --------------  ----- \n",
      " 0   Unnamed: 0                2000 non-null   int64 \n",
      " 1   Age                       2000 non-null   int64 \n",
      " 2   Attrition                 2000 non-null   object\n",
      " 3   BusinessTravel            2000 non-null   object\n",
      " 4   DailyRate                 2000 non-null   int64 \n",
      " 5   Department                2000 non-null   object\n",
      " 6   DistanceFromHome          2000 non-null   int64 \n",
      " 7   Education                 2000 non-null   int64 \n",
      " 8   EducationField            2000 non-null   object\n",
      " 9   EmployeeCount             2000 non-null   int64 \n",
      " 10  EmployeeNumber            2000 non-null   int64 \n",
      " 11  EnvironmentSatisfaction   2000 non-null   int64 \n",
      " 12  Gender                    2000 non-null   object\n",
      " 13  HourlyRate                2000 non-null   int64 \n",
      " 14  JobInvolvement            2000 non-null   int64 \n",
      " 15  JobLevel                  2000 non-null   int64 \n",
      " 16  JobRole                   2000 non-null   object\n",
      " 17  JobSatisfaction           2000 non-null   int64 \n",
      " 18  MaritalStatus             2000 non-null   object\n",
      " 19  MonthlyIncome             2000 non-null   int64 \n",
      " 20  MonthlyRate               2000 non-null   int64 \n",
      " 21  NumCompaniesWorked        2000 non-null   int64 \n",
      " 22  Over18                    2000 non-null   object\n",
      " 23  OverTime                  2000 non-null   object\n",
      " 24  PercentSalaryHike         2000 non-null   int64 \n",
      " 25  PerformanceRating         2000 non-null   int64 \n",
      " 26  RelationshipSatisfaction  2000 non-null   int64 \n",
      " 27  StandardHours             2000 non-null   int64 \n",
      " 28  StockOptionLevel          2000 non-null   int64 \n",
      " 29  TotalWorkingYears         2000 non-null   int64 \n",
      " 30  TrainingTimesLastYear     2000 non-null   int64 \n",
      " 31  WorkLifeBalance           2000 non-null   int64 \n",
      " 32  YearsAtCompany            2000 non-null   int64 \n",
      " 33  YearsInCurrentRole        2000 non-null   int64 \n",
      " 34  YearsSinceLastPromotion   2000 non-null   int64 \n",
      " 35  YearsWithCurrManager      2000 non-null   int64 \n",
      "dtypes: int64(27), object(9)\n",
      "memory usage: 562.6+ KB\n"
     ]
    }
   ],
   "source": [
    "data.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Attrition\n",
       "No     8033.23208\n",
       "Yes    8676.02349\n",
       "Name: MonthlyIncome, dtype: float64"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.groupby('Attrition')['MonthlyIncome'].mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Attrition  Department            \n",
       "No         Human Resources           8212.311828\n",
       "           Research & Development    8018.842391\n",
       "           Sales                     8028.274311\n",
       "Yes        Human Resources           7557.600000\n",
       "           Research & Development    8908.348315\n",
       "           Sales                     8401.754545\n",
       "Name: MonthlyIncome, dtype: float64"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.groupby(['Attrition','Department'])['MonthlyIncome'].mean()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}