Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import plotly.express as px | |
| import plotly.graph_objects as go | |
| from sklearn.preprocessing import MinMaxScaler | |
| from sklearn.ensemble import RandomForestClassifier | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.metrics import accuracy_score | |
| # Load data and pre-process | |
| df = pd.read_csv("HR-Employee-Attrition.csv") | |
| df['Attrition'] = df['Attrition'].map({'Yes': 1, 'No': 0}) | |
| df['Performance_Risk'] = (df['PerformanceRating'] < 3).astype(int) | |
| df['Retention_Risk'] = ((df['JobInvolvement'] < 2) & (df['JobSatisfaction'] < 2)).astype(int) | |
| # Create Retention Score using selected factors | |
| scaler = MinMaxScaler() | |
| retention_factors = df[['JobSatisfaction', 'EnvironmentSatisfaction', 'WorkLifeBalance', 'YearsAtCompany']] | |
| df['Retention_Score'] = scaler.fit_transform(retention_factors).mean(axis=1) | |
| # Configure the Streamlit page | |
| st.set_page_config(layout="wide") | |
| st.title("Employee Performance & Retention Analytics Dashboard") | |
| # Sidebar Filters | |
| st.sidebar.header("Filters") | |
| selected_dept = st.sidebar.selectbox("Department", df['Department'].unique()) | |
| # Filter job roles by the selected department | |
| filtered_jobs = df[df['Department'] == selected_dept]['JobRole'].unique() | |
| selected_job = st.sidebar.selectbox("Job Role", filtered_jobs) | |
| attrition_filter = st.sidebar.radio("Attrition Status", ['All', 'Left', 'Current']) | |
| # Filter DataFrame based on sidebar filters | |
| filtered_df = df[(df['Department'] == selected_dept) & (df['JobRole'] == selected_job)] | |
| if attrition_filter == 'Left': | |
| filtered_df = filtered_df[filtered_df['Attrition'] == 1] | |
| elif attrition_filter == 'Current': | |
| filtered_df = filtered_df[filtered_df['Attrition'] == 0] | |
| # Display Key Metrics | |
| col1, col2, col3, col4 = st.columns(4) | |
| with col1: | |
| st.metric("Attrition Rate", f"{filtered_df['Attrition'].mean()*100:.1f}%") | |
| with col2: | |
| st.metric("Avg Performance Rating", f"{filtered_df['PerformanceRating'].mean():.1f}") | |
| with col3: | |
| st.metric("High Retention Risk", f"{filtered_df['Retention_Risk'].mean()*100:.1f}%") | |
| with col4: | |
| st.metric("Avg Tenure (Years)", f"{filtered_df['YearsAtCompany'].mean():.1f}") | |
| # Machine Learning Model for Attrition Prediction | |
| features = ['Age', 'MonthlyIncome', 'JobSatisfaction', 'EnvironmentSatisfaction', 'WorkLifeBalance', 'YearsAtCompany'] | |
| X = df[features] | |
| y = df['Attrition'] | |
| X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) | |
| model = RandomForestClassifier(n_estimators=100, random_state=42) | |
| model.fit(X_train, y_train) | |
| predictions = model.predict(X_test) | |
| accuracy = accuracy_score(y_test, predictions) | |
| st.sidebar.write(f"Model Accuracy: {accuracy*100:.2f}%") | |
| # Predict attrition for the filtered data | |
| if not filtered_df.empty: | |
| filtered_df['Predicted Attrition'] = model.predict(filtered_df[features]) | |
| attrition_probability = filtered_df['Predicted Attrition'].mean() * 100 | |
| else: | |
| attrition_probability = 0 | |
| # Create Tabs for Visualizations and Explanations | |
| tab1, tab2, tab3 = st.tabs(["Performance Analysis", "Retention Analysis", "Employee Evaluation"]) | |
| # ------------------------------- | |
| # Tab 1: Performance Analysis | |
| # ------------------------------- | |
| with tab1: | |
| st.header("Performance Analysis") | |
| col_left, col_right = st.columns(2) | |
| with col_left: | |
| st.subheader("Performance Rating vs Monthly Income") | |
| fig1 = px.box( | |
| filtered_df, | |
| x='PerformanceRating', | |
| y='MonthlyIncome', | |
| color='Attrition', | |
| title="Performance vs Income" | |
| ) | |
| st.plotly_chart(fig1, use_container_width=True) | |
| st.markdown( | |
| """ | |
| **Explanation:** | |
| - **X-axis:** Performance Rating | |
| - **Y-axis:** Monthly Income | |
| - **Color:** Attrition status (Left/Stayed) | |
| This box plot shows the distribution of salaries across different performance ratings. It can highlight if high-performing employees are under-compensated, potentially driving attrition. | |
| """ | |
| ) | |
| with col_right: | |
| st.subheader("Tenure vs Age Attrition Risk") | |
| fig2 = px.density_heatmap( | |
| filtered_df, | |
| x='YearsAtCompany', | |
| y='Age', | |
| z='Attrition', | |
| histfunc="avg", | |
| title="Tenure vs Age Attrition Risk" | |
| ) | |
| st.plotly_chart(fig2, use_container_width=True) | |
| st.markdown( | |
| """ | |
| **Explanation:** | |
| - **X-axis:** Years at Company | |
| - **Y-axis:** Age | |
| - **Color Intensity:** Attrition likelihood | |
| This heatmap correlates employees' age and tenure with their attrition risk. Darker areas suggest higher likelihoods of attrition, identifying groups that may need targeted retention efforts. | |
| """ | |
| ) | |
| # ------------------------------- | |
| # Tab 2: Retention Analysis | |
| # ------------------------------- | |
| with tab2: | |
| st.header("Retention Analysis") | |
| col_left, col_right = st.columns(2) | |
| with col_left: | |
| st.subheader("Job Satisfaction vs Work-Life Balance") | |
| fig3 = px.scatter( | |
| filtered_df, | |
| x='JobSatisfaction', | |
| y='WorkLifeBalance', | |
| color='Attrition', | |
| size='YearsAtCompany', | |
| title="Satisfaction vs Retention" | |
| ) | |
| st.plotly_chart(fig3, use_container_width=True) | |
| st.markdown( | |
| """ | |
| **Explanation:** | |
| - **X-axis:** Job Satisfaction | |
| - **Y-axis:** Work-Life Balance | |
| - **Bubble Size:** Years at Company | |
| - **Color:** Attrition status | |
| This scatter plot helps visualize how job satisfaction and work-life balance correlate with attrition. Clusters in lower satisfaction and balance areas can indicate higher attrition risks. | |
| """ | |
| ) | |
| with col_right: | |
| st.subheader("Promotion History Impact") | |
| fig4 = go.Figure() | |
| fig4.add_trace( | |
| go.Histogram( | |
| x=filtered_df[filtered_df['Attrition'] == 1]['YearsSinceLastPromotion'], | |
| name='Left' | |
| ) | |
| ) | |
| fig4.add_trace( | |
| go.Histogram( | |
| x=filtered_df[filtered_df['Attrition'] == 0]['YearsSinceLastPromotion'], | |
| name='Stayed' | |
| ) | |
| ) | |
| fig4.update_layout(title="Promotion History Impact", barmode='overlay') | |
| st.plotly_chart(fig4, use_container_width=True) | |
| st.markdown( | |
| """ | |
| **Explanation:** | |
| - **X-axis:** Years Since Last Promotion | |
| - **Bars:** Comparison between employees who left and those who stayed | |
| This histogram examines whether long gaps since the last promotion correlate with higher attrition, suggesting that career stagnation might drive employees to leave. | |
| """ | |
| ) | |
| # ------------------------------- | |
| # Tab 3: Employee Evaluation | |
| # ------------------------------- | |
| with tab3: | |
| st.header("Employee Evaluation") | |
| if not filtered_df.empty: | |
| selected_employee = st.selectbox("Select Employee", filtered_df['EmployeeNumber']) | |
| emp_data = filtered_df[filtered_df['EmployeeNumber'] == selected_employee].iloc[0] | |
| col_left, col_right = st.columns(2) | |
| with col_left: | |
| st.subheader("Employee Metrics") | |
| metrics = { | |
| 'Performance Rating': emp_data['PerformanceRating'], | |
| 'Job Satisfaction': emp_data['JobSatisfaction'], | |
| 'Work-Life Balance': emp_data['WorkLifeBalance'], | |
| 'Retention Score': emp_data['Retention_Score'] | |
| } | |
| fig5 = go.Figure(go.Bar( | |
| x=list(metrics.values()), | |
| y=list(metrics.keys()), | |
| orientation='h' | |
| )) | |
| fig5.update_layout(title="Employee Profile") | |
| st.plotly_chart(fig5, use_container_width=True) | |
| st.markdown( | |
| """ | |
| **Explanation:** | |
| This bar chart visualizes the key metrics for the selected employee, highlighting performance, satisfaction, work-life balance, and overall retention score. | |
| """ | |
| ) | |
| with col_right: | |
| st.subheader("Retention Recommendation") | |
| risk_factors = [] | |
| if emp_data['PerformanceRating'] < 3: | |
| risk_factors.append("Low Performance") | |
| if emp_data['YearsSinceLastPromotion'] > 3: | |
| risk_factors.append("Stagnant Position") | |
| if emp_data['WorkLifeBalance'] < 2: | |
| risk_factors.append("Poor Work-Life Balance") | |
| retention_prob = emp_data['Retention_Score'] * 100 | |
| st.metric("Retention Probability", f"{retention_prob:.1f}%") | |
| if retention_prob > 70: | |
| st.success("High Retention Potential - Recommend Retention Programs") | |
| elif retention_prob > 40: | |
| st.warning("Moderate Retention Risk - Monitor Closely") | |
| else: | |
| st.error("High Attrition Risk - Recommend Intervention") | |
| if risk_factors: | |
| st.write("**Key Risk Factors:**") | |
| for factor in risk_factors: | |
| st.write(f"- {factor}") | |
| st.markdown( | |
| """ | |
| **Explanation:** | |
| This section provides tailored retention recommendations based on the employee's metrics and key risk factors. The displayed retention probability guides whether further intervention is needed. | |
| """ | |
| ) | |
| else: | |
| st.write("No employee data available for the selected filters.") | |
| # ------------------------------- | |
| # AI-Powered Insights Section | |
| # ------------------------------- | |
| st.header("AI-Powered Insights") | |
| if st.button("Generate Department Insights"): | |
| insights = f""" | |
| **Department Insights for {selected_dept} - {selected_job}:** | |
| - Overall Attrition Rate: {filtered_df['Attrition'].mean()*100:.1f}% compared to the company average: {df['Attrition'].mean()*100:.1f}%. | |
| - Employees with lower job satisfaction and poor work-life balance tend to have higher attrition risk. | |
| - Predicted attrition probability based on the ML model: {attrition_probability:.1f}%. | |
| """ | |
| st.write(insights) | |