Spaces:

zaid002
/

employee-attributes

Sleeping

App Files Files Community

zaid002 commited on Oct 16, 2025

Commit

7a34791

verified ·

1 Parent(s): 05bcc01

Upload employee_attribute.py

Browse files

Files changed (1) hide show

employee_attribute.py +215 -0

employee_attribute.py ADDED Viewed

	@@ -0,0 +1,215 @@

+# -*- coding: utf-8 -*-
+"""employee-attribute.ipynb
+Automatically generated by Colab.
+Original file is located at
+    https://colab.research.google.com/drive/1eSxTWsZdcxggnTg1ErD9yUiChlR0ko4t
+"""
+import pandas as pd
+pd.read_csv("/content/Employee-Attrition - Employee-Attrition.csv")
+"""## data preprocessing"""
+# employee_attrition_preprocessing.py
+import pandas as pd
+from sklearn.preprocessing import LabelEncoder
+# Load dataset
+data = pd.read_csv("/content/Employee-Attrition - Employee-Attrition.csv")
+# Drop constant or irrelevant columns
+data.drop(['EmployeeCount', 'EmployeeNumber', 'Over18', 'StandardHours'], axis=1, inplace=True)
+# Encode categorical variables
+label = LabelEncoder()
+for col in data.select_dtypes(include=['object']).columns:
+    data[col] = label.fit_transform(data[col])
+# Handle missing values (if any)
+data.fillna(data.median(), inplace=True)
+print("✅ Data preprocessing complete. Shape:", data.shape)
+data.to_csv("cleaned_employee_data.csv", index=False)
+"""## EDA"""
+# employee_attrition_eda.py
+import pandas as pd
+import matplotlib.pyplot as plt
+import seaborn as sns
+data = pd.read_csv("cleaned_employee_data.csv")
+# Attrition distribution
+sns.countplot(x='Attrition', data=data)
+plt.title("Attrition Count (0 = Stay, 1 = Leave)")
+plt.show()
+# Correlation heatmap
+plt.figure(figsize=(10,6))
+sns.heatmap(data.corr(), cmap="coolwarm")
+plt.title("Feature Correlation Heatmap")
+plt.show()
+# Example: relationship between JobSatisfaction and Attrition
+sns.boxplot(x='Attrition', y='JobSatisfaction', data=data)
+plt.title("Job Satisfaction vs Attrition")
+plt.show()
+"""## MODEL BUILDING EVALUATION"""
+# employee_attrition_model.py
+import pandas as pd
+from sklearn.model_selection import train_test_split
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
+import joblib
+# Load data
+data = pd.read_csv("cleaned_employee_data.csv")
+X = data.drop("Attrition", axis=1)
+y = data["Attrition"]
+# Split data
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+# Train model
+model = RandomForestClassifier(n_estimators=100, random_state=42)
+model.fit(X_train, y_train)
+# Predictions
+y_pred = model.predict(X_test)
+# Evaluate
+print("Accuracy:", accuracy_score(y_test, y_pred))
+print("\nClassification Report:\n", classification_report(y_test, y_pred))
+print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
+# Save model
+joblib.dump(model, "employee_attrition_model.pkl")
+print("✅ Model saved successfully!")
+"""## streamlit app prediction"""
+# streamlit_app.py
+import streamlit as st
+import pandas as pd
+import joblib
+st.title("👩‍💼 Employee Attrition Prediction")
+# Load model
+model = joblib.load("employee_attrition_model.pkl")
+# Input form
+st.header("Enter Employee Details:")
+age = st.number_input("Age", 18, 60)
+monthly_income = st.number_input("Monthly Income", 1000, 20000)
+job_satisfaction = st.slider("Job Satisfaction (1–4)", 1, 4)
+work_life_balance = st.slider("Work-Life Balance (1–4)", 1, 4)
+years_at_company = st.number_input("Years at Company", 0, 40)
+overtime = st.selectbox("OverTime", ["Yes", "No"])
+# Convert to numeric
+overtime_value = 1 if overtime == "Yes" else 0
+# Prepare input
+input_data = pd.DataFrame({
+    'Age': [age],
+    'MonthlyIncome': [monthly_income],
+    'JobSatisfaction': [job_satisfaction],
+    'WorkLifeBalance': [work_life_balance],
+    'YearsAtCompany': [years_at_company],
+    'OverTime': [overtime_value]
+})
+# Prediction
+if st.button("Predict Attrition"):
+    prediction = model.predict(input_data)[0]
+    if prediction == 1:
+        st.error("⚠️ This employee is likely to leave the company.")
+    else:
+        st.success("✅ This employee is likely to stay.")
+!pip install streamlit
+!pip install -r requirements.txt
+streamlit run streamlit_app.py
+# Commented out IPython magic to ensure Python compatibility.
+# %%writefile streamlit_app.py
+# import streamlit as st
+# import pandas as pd
+# import joblib
+#
+# st.title("👩‍💼 Employee Attrition Prediction")
+#
+# # Load model
+# model = joblib.load("employee_attrition_model.pkl")
+#
+# # Input form
+# st.header("Enter Employee Details:")
+#
+# age = st.number_input("Age", 18, 60)
+# monthly_income = st.number_input("Monthly Income", 1000, 20000)
+# job_satisfaction = st.slider("Job Satisfaction (1–4)", 1, 4)
+# work_life_balance = st.slider("Work-Life Balance (1–4)", 1, 4)
+# years_at_company = st.number_input("Years at Company", 0, 40)
+# overtime = st.selectbox("OverTime", ["Yes", "No"])
+#
+# # Convert to numeric
+# overtime_value = 1 if overtime == "Yes" else 0
+#
+# # Prepare input
+# input_data = pd.DataFrame({
+#     'Age': [age],
+#     'MonthlyIncome': [monthly_income],
+#     'JobSatisfaction': [job_satisfaction],
+#     'WorkLifeBalance': [work_life_balance],
+#     'YearsAtCompany': [years_at_company],
+#     'OverTime': [overtime_value]
+# })
+#
+# # Prediction
+# if st.button("Predict Attrition"):
+#     prediction = model.predict(input_data)[0]
+#     if prediction == 1:
+#         st.error("⚠️ This employee is likely to leave the company.")
+#     else:
+#         st.success("✅ This employee is likely to stay.")
+!streamlit run streamlit_app.py
+"""## AU-ROC score"""
+from sklearn.metrics import roc_auc_score
+roc = roc_auc_score(y_test, y_pred)
+print("AUC-ROC:", roc)
+!pip install streamlit
+# train_model.py
+import pandas as pd
+from sklearn.model_selection import train_test_split
+from sklearn.ensemble import RandomForestClassifier
+import joblib
+# Example: load your HR dataset
+data = pd.read_csv("cleaned_employee_data.csv")
+X = data.drop("Attrition", axis=1)
+y = data["Attrition"]
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+model = RandomForestClassifier(n_estimators=100, random_state=42)
+model.fit(X_train, y_train)
+joblib.dump(model, "employee_attrition_model.pkl")
+print("✅ Model saved successfully!")