zaid002 commited on
Commit
7a34791
Β·
verified Β·
1 Parent(s): 05bcc01

Upload employee_attribute.py

Browse files
Files changed (1) hide show
  1. employee_attribute.py +215 -0
employee_attribute.py ADDED
@@ -0,0 +1,215 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """employee-attribute.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1eSxTWsZdcxggnTg1ErD9yUiChlR0ko4t
8
+ """
9
+
10
+ import pandas as pd
11
+ pd.read_csv("/content/Employee-Attrition - Employee-Attrition.csv")
12
+
13
+ """## data preprocessing"""
14
+
15
+ # employee_attrition_preprocessing.py
16
+ import pandas as pd
17
+ from sklearn.preprocessing import LabelEncoder
18
+
19
+ # Load dataset
20
+ data = pd.read_csv("/content/Employee-Attrition - Employee-Attrition.csv")
21
+
22
+ # Drop constant or irrelevant columns
23
+ data.drop(['EmployeeCount', 'EmployeeNumber', 'Over18', 'StandardHours'], axis=1, inplace=True)
24
+
25
+ # Encode categorical variables
26
+ label = LabelEncoder()
27
+ for col in data.select_dtypes(include=['object']).columns:
28
+ data[col] = label.fit_transform(data[col])
29
+
30
+ # Handle missing values (if any)
31
+ data.fillna(data.median(), inplace=True)
32
+
33
+ print("βœ… Data preprocessing complete. Shape:", data.shape)
34
+ data.to_csv("cleaned_employee_data.csv", index=False)
35
+
36
+ """## EDA"""
37
+
38
+ # employee_attrition_eda.py
39
+ import pandas as pd
40
+ import matplotlib.pyplot as plt
41
+ import seaborn as sns
42
+
43
+ data = pd.read_csv("cleaned_employee_data.csv")
44
+
45
+ # Attrition distribution
46
+ sns.countplot(x='Attrition', data=data)
47
+ plt.title("Attrition Count (0 = Stay, 1 = Leave)")
48
+ plt.show()
49
+
50
+ # Correlation heatmap
51
+ plt.figure(figsize=(10,6))
52
+ sns.heatmap(data.corr(), cmap="coolwarm")
53
+ plt.title("Feature Correlation Heatmap")
54
+ plt.show()
55
+
56
+ # Example: relationship between JobSatisfaction and Attrition
57
+ sns.boxplot(x='Attrition', y='JobSatisfaction', data=data)
58
+ plt.title("Job Satisfaction vs Attrition")
59
+ plt.show()
60
+
61
+ """## MODEL BUILDING EVALUATION"""
62
+
63
+ # employee_attrition_model.py
64
+ import pandas as pd
65
+ from sklearn.model_selection import train_test_split
66
+ from sklearn.ensemble import RandomForestClassifier
67
+ from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
68
+ import joblib
69
+
70
+ # Load data
71
+ data = pd.read_csv("cleaned_employee_data.csv")
72
+
73
+ X = data.drop("Attrition", axis=1)
74
+ y = data["Attrition"]
75
+
76
+ # Split data
77
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
78
+
79
+ # Train model
80
+ model = RandomForestClassifier(n_estimators=100, random_state=42)
81
+ model.fit(X_train, y_train)
82
+
83
+ # Predictions
84
+ y_pred = model.predict(X_test)
85
+
86
+ # Evaluate
87
+ print("Accuracy:", accuracy_score(y_test, y_pred))
88
+ print("\nClassification Report:\n", classification_report(y_test, y_pred))
89
+ print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
90
+
91
+ # Save model
92
+ joblib.dump(model, "employee_attrition_model.pkl")
93
+ print("βœ… Model saved successfully!")
94
+
95
+ """## streamlit app prediction"""
96
+
97
+ # streamlit_app.py
98
+ import streamlit as st
99
+ import pandas as pd
100
+ import joblib
101
+
102
+ st.title("πŸ‘©β€πŸ’Ό Employee Attrition Prediction")
103
+
104
+ # Load model
105
+ model = joblib.load("employee_attrition_model.pkl")
106
+
107
+ # Input form
108
+ st.header("Enter Employee Details:")
109
+
110
+ age = st.number_input("Age", 18, 60)
111
+ monthly_income = st.number_input("Monthly Income", 1000, 20000)
112
+ job_satisfaction = st.slider("Job Satisfaction (1–4)", 1, 4)
113
+ work_life_balance = st.slider("Work-Life Balance (1–4)", 1, 4)
114
+ years_at_company = st.number_input("Years at Company", 0, 40)
115
+ overtime = st.selectbox("OverTime", ["Yes", "No"])
116
+
117
+ # Convert to numeric
118
+ overtime_value = 1 if overtime == "Yes" else 0
119
+
120
+ # Prepare input
121
+ input_data = pd.DataFrame({
122
+ 'Age': [age],
123
+ 'MonthlyIncome': [monthly_income],
124
+ 'JobSatisfaction': [job_satisfaction],
125
+ 'WorkLifeBalance': [work_life_balance],
126
+ 'YearsAtCompany': [years_at_company],
127
+ 'OverTime': [overtime_value]
128
+ })
129
+
130
+ # Prediction
131
+ if st.button("Predict Attrition"):
132
+ prediction = model.predict(input_data)[0]
133
+ if prediction == 1:
134
+ st.error("⚠️ This employee is likely to leave the company.")
135
+ else:
136
+ st.success("βœ… This employee is likely to stay.")
137
+
138
+ !pip install streamlit
139
+
140
+ !pip install -r requirements.txt
141
+
142
+ streamlit run streamlit_app.py
143
+
144
+ # Commented out IPython magic to ensure Python compatibility.
145
+ # %%writefile streamlit_app.py
146
+ # import streamlit as st
147
+ # import pandas as pd
148
+ # import joblib
149
+ #
150
+ # st.title("πŸ‘©β€πŸ’Ό Employee Attrition Prediction")
151
+ #
152
+ # # Load model
153
+ # model = joblib.load("employee_attrition_model.pkl")
154
+ #
155
+ # # Input form
156
+ # st.header("Enter Employee Details:")
157
+ #
158
+ # age = st.number_input("Age", 18, 60)
159
+ # monthly_income = st.number_input("Monthly Income", 1000, 20000)
160
+ # job_satisfaction = st.slider("Job Satisfaction (1–4)", 1, 4)
161
+ # work_life_balance = st.slider("Work-Life Balance (1–4)", 1, 4)
162
+ # years_at_company = st.number_input("Years at Company", 0, 40)
163
+ # overtime = st.selectbox("OverTime", ["Yes", "No"])
164
+ #
165
+ # # Convert to numeric
166
+ # overtime_value = 1 if overtime == "Yes" else 0
167
+ #
168
+ # # Prepare input
169
+ # input_data = pd.DataFrame({
170
+ # 'Age': [age],
171
+ # 'MonthlyIncome': [monthly_income],
172
+ # 'JobSatisfaction': [job_satisfaction],
173
+ # 'WorkLifeBalance': [work_life_balance],
174
+ # 'YearsAtCompany': [years_at_company],
175
+ # 'OverTime': [overtime_value]
176
+ # })
177
+ #
178
+ # # Prediction
179
+ # if st.button("Predict Attrition"):
180
+ # prediction = model.predict(input_data)[0]
181
+ # if prediction == 1:
182
+ # st.error("⚠️ This employee is likely to leave the company.")
183
+ # else:
184
+ # st.success("βœ… This employee is likely to stay.")
185
+
186
+ !streamlit run streamlit_app.py
187
+
188
+ """## AU-ROC score"""
189
+
190
+ from sklearn.metrics import roc_auc_score
191
+ roc = roc_auc_score(y_test, y_pred)
192
+ print("AUC-ROC:", roc)
193
+
194
+ !pip install streamlit
195
+
196
+ # train_model.py
197
+ import pandas as pd
198
+ from sklearn.model_selection import train_test_split
199
+ from sklearn.ensemble import RandomForestClassifier
200
+ import joblib
201
+
202
+ # Example: load your HR dataset
203
+ data = pd.read_csv("cleaned_employee_data.csv")
204
+
205
+ X = data.drop("Attrition", axis=1)
206
+ y = data["Attrition"]
207
+
208
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
209
+
210
+ model = RandomForestClassifier(n_estimators=100, random_state=42)
211
+ model.fit(X_train, y_train)
212
+
213
+ joblib.dump(model, "employee_attrition_model.pkl")
214
+ print("βœ… Model saved successfully!")
215
+