Spaces:
Sleeping
Sleeping
Upload 8 files
Browse files
app.py
CHANGED
|
@@ -1,5 +1,4 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
-
import pandas as pd
|
| 3 |
import eda
|
| 4 |
import prediction
|
| 5 |
import home
|
|
|
|
| 1 |
import streamlit as st
|
|
|
|
| 2 |
import eda
|
| 3 |
import prediction
|
| 4 |
import home
|
eda.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
import pandas as pd
|
| 3 |
import matplotlib.pyplot as plt
|
| 4 |
-
import numpy as np
|
| 5 |
import seaborn as sns
|
| 6 |
|
| 7 |
def map_values(arr):
|
|
@@ -125,15 +124,12 @@ def app():
|
|
| 125 |
st.header('Remote Work & Mental Health Prediction Project', divider='rainbow')
|
| 126 |
st.subheader('Exploratory Data Analysis')
|
| 127 |
|
| 128 |
-
|
| 129 |
-
eda_list = ["Numerical Distributions", "Age Groups Chart", "Job Roles Chart", "Satisfaction With Remote Work Chart", "Stress Level Correlations", "Social Isolation Rating Chart", "Stress Level By Job Role", "Mental Health Condition By Job Role", "Productivity Rate By Work Location", "Mental Health Condition By Work Location"]
|
| 130 |
val = st.sidebar.radio("Choose plot to show", eda_list)
|
| 131 |
if val == "Numerical Distributions":
|
| 132 |
numerical_columns = ['Age', 'Years_of_Experience', 'Hours_Worked_Per_Week', 'Number_of_Virtual_Meetings']
|
| 133 |
df[numerical_columns].hist(figsize=(12, 8))
|
| 134 |
-
|
| 135 |
-
# Show the plot
|
| 136 |
-
plt.tight_layout() # Adjust layout
|
| 137 |
plt.suptitle("Numerical values histogram")
|
| 138 |
plt.subplots_adjust(left=0.1, right=0.9, top=0.85, bottom=0.1, wspace=0.2, hspace=0.2)
|
| 139 |
st.pyplot(plt)
|
|
@@ -145,10 +141,6 @@ def app():
|
|
| 145 |
plt.tight_layout()
|
| 146 |
plt.title("Age Group Pie Chart")
|
| 147 |
plt.subplots_adjust(left=0.1, right=0.9, top=0.85, bottom=0.1, wspace=0.2, hspace=0.2)
|
| 148 |
-
|
| 149 |
-
# plt.suptitle('Demographics columns distributions', fontsize=16)
|
| 150 |
-
|
| 151 |
-
# Show the charts
|
| 152 |
st.pyplot(plt)
|
| 153 |
st.write("Insight: The age distribution seems to be quite balanced from the 3 categories, with the most data lies in Mid-career adults (Aged 35-49)")
|
| 154 |
elif val == "Job Roles Chart":
|
|
@@ -158,25 +150,10 @@ def app():
|
|
| 158 |
plt.tight_layout()
|
| 159 |
plt.title("Job Roles Pie Chart")
|
| 160 |
plt.subplots_adjust(left=0.1, right=0.9, top=0.85, bottom=0.1, wspace=0.2, hspace=0.2)
|
| 161 |
-
|
| 162 |
-
# plt.suptitle('Demographics columns distributions', fontsize=16)
|
| 163 |
-
|
| 164 |
-
# Show the charts
|
| 165 |
st.markdown('<div class="center">', unsafe_allow_html=True)
|
| 166 |
st.pyplot(plt)
|
| 167 |
st.markdown('</div>', unsafe_allow_html=True)
|
| 168 |
st.write("Insight: The job roles distribution seem to be quite balanced from the 7 categories, with the most data being Project Manager with 14.8% of the total")
|
| 169 |
-
elif val == "Satisfaction With Remote Work Chart":
|
| 170 |
-
counts = df.groupby(['Access_to_Mental_Health_Resources', 'Satisfaction_with_Remote_Work']).size().unstack()
|
| 171 |
-
# Plotting
|
| 172 |
-
plt.figure(figsize=(10, 6))
|
| 173 |
-
counts.plot(kind='bar', stacked=False, color=['skyblue', 'coral', 'limegreen']) # Customize colors as desired
|
| 174 |
-
plt.title('Satisfaction with Remote Work by Access to Mental Health Resources')
|
| 175 |
-
plt.xlabel('Access to Mental Health Resources')
|
| 176 |
-
plt.ylabel('Count')
|
| 177 |
-
plt.xticks(rotation=0) # Rotate x-axis labels for better readability
|
| 178 |
-
plt.legend(title='Satisfaction with Remote Work')
|
| 179 |
-
st.pyplot(plt)
|
| 180 |
elif val == "Stress Level Correlations":
|
| 181 |
numerical_columns_with_stress_level = ['Stress_Level', 'Age', 'Years_of_Experience', 'Hours_Worked_Per_Week', 'Number_of_Virtual_Meetings']
|
| 182 |
correlation_matrix = df[numerical_columns_with_stress_level].corr()
|
|
@@ -184,28 +161,33 @@ def app():
|
|
| 184 |
sns.heatmap(correlation_matrix, annot=True, cmap='viridis', linewidths=0.5) # Replace 'viridis' with your preferred colormap
|
| 185 |
plt.title('Correlation Matrix of numerical columns and stress level with Heatmap')
|
| 186 |
st.pyplot(plt)
|
|
|
|
| 187 |
elif val == "Social Isolation Rating Chart":
|
| 188 |
-
# Plotting the bar plot for remote work location
|
| 189 |
sns.barplot(x='Access_to_Mental_Health_Resources', y='Social_Isolation_Rating', data=df[df["Work_Location"] == "Remote"])
|
| 190 |
plt.title('Social Isolation Rating for Remote Workers with/without Access to Mental Health Resources')
|
| 191 |
st.pyplot(plt)
|
|
|
|
| 192 |
elif val == "Stress Level By Job Role":
|
| 193 |
plt.figure(figsize=(12,6))
|
| 194 |
sns.countplot(x='Job_Role',hue='Stress_Level',data=df_original)
|
| 195 |
plt.title('Stress Level by Job Role')
|
| 196 |
st.pyplot(plt)
|
|
|
|
| 197 |
elif val == "Mental Health Condition By Job Role":
|
| 198 |
plt.figure(figsize=(15,10))
|
| 199 |
sns.countplot(x='Job_Role', hue='Mental_Health_Condition', data=df)
|
| 200 |
plt.title('Mental Health Condition by Job Role')
|
| 201 |
st.pyplot(plt)
|
|
|
|
| 202 |
elif val == "Productivity Rate By Work Location":
|
| 203 |
plt.figure(figsize=(12,6))
|
| 204 |
sns.countplot(data=df, x='Work_Location', hue='Productivity_Change')
|
| 205 |
plt.title('Rate Of Productivity By Work Location')
|
| 206 |
st.pyplot(plt)
|
|
|
|
| 207 |
elif val == "Mental Health Condition By Work Location":
|
| 208 |
plt.figure(figsize=(12,6))
|
| 209 |
sns.countplot(x='Work_Location',hue='Mental_Health_Condition',data=df)
|
| 210 |
plt.title('Mental Health Condition by Work Location')
|
| 211 |
-
st.pyplot(plt)
|
|
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
import pandas as pd
|
| 3 |
import matplotlib.pyplot as plt
|
|
|
|
| 4 |
import seaborn as sns
|
| 5 |
|
| 6 |
def map_values(arr):
|
|
|
|
| 124 |
st.header('Remote Work & Mental Health Prediction Project', divider='rainbow')
|
| 125 |
st.subheader('Exploratory Data Analysis')
|
| 126 |
|
| 127 |
+
eda_list = ["Numerical Distributions", "Age Groups Chart", "Job Roles Chart", "Stress Level Correlations", "Social Isolation Rating Chart", "Stress Level By Job Role", "Mental Health Condition By Job Role", "Productivity Rate By Work Location", "Mental Health Condition By Work Location"]
|
|
|
|
| 128 |
val = st.sidebar.radio("Choose plot to show", eda_list)
|
| 129 |
if val == "Numerical Distributions":
|
| 130 |
numerical_columns = ['Age', 'Years_of_Experience', 'Hours_Worked_Per_Week', 'Number_of_Virtual_Meetings']
|
| 131 |
df[numerical_columns].hist(figsize=(12, 8))
|
| 132 |
+
plt.tight_layout()
|
|
|
|
|
|
|
| 133 |
plt.suptitle("Numerical values histogram")
|
| 134 |
plt.subplots_adjust(left=0.1, right=0.9, top=0.85, bottom=0.1, wspace=0.2, hspace=0.2)
|
| 135 |
st.pyplot(plt)
|
|
|
|
| 141 |
plt.tight_layout()
|
| 142 |
plt.title("Age Group Pie Chart")
|
| 143 |
plt.subplots_adjust(left=0.1, right=0.9, top=0.85, bottom=0.1, wspace=0.2, hspace=0.2)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
st.pyplot(plt)
|
| 145 |
st.write("Insight: The age distribution seems to be quite balanced from the 3 categories, with the most data lies in Mid-career adults (Aged 35-49)")
|
| 146 |
elif val == "Job Roles Chart":
|
|
|
|
| 150 |
plt.tight_layout()
|
| 151 |
plt.title("Job Roles Pie Chart")
|
| 152 |
plt.subplots_adjust(left=0.1, right=0.9, top=0.85, bottom=0.1, wspace=0.2, hspace=0.2)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
st.markdown('<div class="center">', unsafe_allow_html=True)
|
| 154 |
st.pyplot(plt)
|
| 155 |
st.markdown('</div>', unsafe_allow_html=True)
|
| 156 |
st.write("Insight: The job roles distribution seem to be quite balanced from the 7 categories, with the most data being Project Manager with 14.8% of the total")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
elif val == "Stress Level Correlations":
|
| 158 |
numerical_columns_with_stress_level = ['Stress_Level', 'Age', 'Years_of_Experience', 'Hours_Worked_Per_Week', 'Number_of_Virtual_Meetings']
|
| 159 |
correlation_matrix = df[numerical_columns_with_stress_level].corr()
|
|
|
|
| 161 |
sns.heatmap(correlation_matrix, annot=True, cmap='viridis', linewidths=0.5) # Replace 'viridis' with your preferred colormap
|
| 162 |
plt.title('Correlation Matrix of numerical columns and stress level with Heatmap')
|
| 163 |
st.pyplot(plt)
|
| 164 |
+
st.write("Insight: There does not seem to be much correlations between stress level and numerical features")
|
| 165 |
elif val == "Social Isolation Rating Chart":
|
|
|
|
| 166 |
sns.barplot(x='Access_to_Mental_Health_Resources', y='Social_Isolation_Rating', data=df[df["Work_Location"] == "Remote"])
|
| 167 |
plt.title('Social Isolation Rating for Remote Workers with/without Access to Mental Health Resources')
|
| 168 |
st.pyplot(plt)
|
| 169 |
+
st.write("Insight: It appears that employees with access to mental health resources feel slightly socially more isolated")
|
| 170 |
elif val == "Stress Level By Job Role":
|
| 171 |
plt.figure(figsize=(12,6))
|
| 172 |
sns.countplot(x='Job_Role',hue='Stress_Level',data=df_original)
|
| 173 |
plt.title('Stress Level by Job Role')
|
| 174 |
st.pyplot(plt)
|
| 175 |
+
st.write("Insight: There does not seem to be any role that's too stressed, or least stressed. Everything seems balanced.")
|
| 176 |
elif val == "Mental Health Condition By Job Role":
|
| 177 |
plt.figure(figsize=(15,10))
|
| 178 |
sns.countplot(x='Job_Role', hue='Mental_Health_Condition', data=df)
|
| 179 |
plt.title('Mental Health Condition by Job Role')
|
| 180 |
st.pyplot(plt)
|
| 181 |
+
st.write("Insight: It can be seen that the role with the most burnout and least normal is Data Scientist. All others seem to be quite balanced.")
|
| 182 |
elif val == "Productivity Rate By Work Location":
|
| 183 |
plt.figure(figsize=(12,6))
|
| 184 |
sns.countplot(data=df, x='Work_Location', hue='Productivity_Change')
|
| 185 |
plt.title('Rate Of Productivity By Work Location')
|
| 186 |
st.pyplot(plt)
|
| 187 |
+
st.write("Insight: There does not seem to be any work location that contribute to increase/decrease in productivity change, all seems balanced.")
|
| 188 |
elif val == "Mental Health Condition By Work Location":
|
| 189 |
plt.figure(figsize=(12,6))
|
| 190 |
sns.countplot(x='Work_Location',hue='Mental_Health_Condition',data=df)
|
| 191 |
plt.title('Mental Health Condition by Work Location')
|
| 192 |
+
st.pyplot(plt)
|
| 193 |
+
st.write("Insight: There does not seem to be any work location that contribute to more burnout or more depression, all seems balanced.")
|