stanlys96 commited on
Commit
26605a1
·
verified ·
1 Parent(s): f9c43fa

Upload 8 files

Browse files
Files changed (2) hide show
  1. app.py +0 -1
  2. eda.py +9 -27
app.py CHANGED
@@ -1,5 +1,4 @@
1
  import streamlit as st
2
- import pandas as pd
3
  import eda
4
  import prediction
5
  import home
 
1
  import streamlit as st
 
2
  import eda
3
  import prediction
4
  import home
eda.py CHANGED
@@ -1,7 +1,6 @@
1
  import streamlit as st
2
  import pandas as pd
3
  import matplotlib.pyplot as plt
4
- import numpy as np
5
  import seaborn as sns
6
 
7
  def map_values(arr):
@@ -125,15 +124,12 @@ def app():
125
  st.header('Remote Work & Mental Health Prediction Project', divider='rainbow')
126
  st.subheader('Exploratory Data Analysis')
127
 
128
- # selection = st.sidebar.selectbox("Go to", list(PAGES.keys()))
129
- eda_list = ["Numerical Distributions", "Age Groups Chart", "Job Roles Chart", "Satisfaction With Remote Work Chart", "Stress Level Correlations", "Social Isolation Rating Chart", "Stress Level By Job Role", "Mental Health Condition By Job Role", "Productivity Rate By Work Location", "Mental Health Condition By Work Location"]
130
  val = st.sidebar.radio("Choose plot to show", eda_list)
131
  if val == "Numerical Distributions":
132
  numerical_columns = ['Age', 'Years_of_Experience', 'Hours_Worked_Per_Week', 'Number_of_Virtual_Meetings']
133
  df[numerical_columns].hist(figsize=(12, 8))
134
-
135
- # Show the plot
136
- plt.tight_layout() # Adjust layout
137
  plt.suptitle("Numerical values histogram")
138
  plt.subplots_adjust(left=0.1, right=0.9, top=0.85, bottom=0.1, wspace=0.2, hspace=0.2)
139
  st.pyplot(plt)
@@ -145,10 +141,6 @@ def app():
145
  plt.tight_layout()
146
  plt.title("Age Group Pie Chart")
147
  plt.subplots_adjust(left=0.1, right=0.9, top=0.85, bottom=0.1, wspace=0.2, hspace=0.2)
148
-
149
- # plt.suptitle('Demographics columns distributions', fontsize=16)
150
-
151
- # Show the charts
152
  st.pyplot(plt)
153
  st.write("Insight: The age distribution seems to be quite balanced from the 3 categories, with the most data lies in Mid-career adults (Aged 35-49)")
154
  elif val == "Job Roles Chart":
@@ -158,25 +150,10 @@ def app():
158
  plt.tight_layout()
159
  plt.title("Job Roles Pie Chart")
160
  plt.subplots_adjust(left=0.1, right=0.9, top=0.85, bottom=0.1, wspace=0.2, hspace=0.2)
161
-
162
- # plt.suptitle('Demographics columns distributions', fontsize=16)
163
-
164
- # Show the charts
165
  st.markdown('<div class="center">', unsafe_allow_html=True)
166
  st.pyplot(plt)
167
  st.markdown('</div>', unsafe_allow_html=True)
168
  st.write("Insight: The job roles distribution seem to be quite balanced from the 7 categories, with the most data being Project Manager with 14.8% of the total")
169
- elif val == "Satisfaction With Remote Work Chart":
170
- counts = df.groupby(['Access_to_Mental_Health_Resources', 'Satisfaction_with_Remote_Work']).size().unstack()
171
- # Plotting
172
- plt.figure(figsize=(10, 6))
173
- counts.plot(kind='bar', stacked=False, color=['skyblue', 'coral', 'limegreen']) # Customize colors as desired
174
- plt.title('Satisfaction with Remote Work by Access to Mental Health Resources')
175
- plt.xlabel('Access to Mental Health Resources')
176
- plt.ylabel('Count')
177
- plt.xticks(rotation=0) # Rotate x-axis labels for better readability
178
- plt.legend(title='Satisfaction with Remote Work')
179
- st.pyplot(plt)
180
  elif val == "Stress Level Correlations":
181
  numerical_columns_with_stress_level = ['Stress_Level', 'Age', 'Years_of_Experience', 'Hours_Worked_Per_Week', 'Number_of_Virtual_Meetings']
182
  correlation_matrix = df[numerical_columns_with_stress_level].corr()
@@ -184,28 +161,33 @@ def app():
184
  sns.heatmap(correlation_matrix, annot=True, cmap='viridis', linewidths=0.5) # Replace 'viridis' with your preferred colormap
185
  plt.title('Correlation Matrix of numerical columns and stress level with Heatmap')
186
  st.pyplot(plt)
 
187
  elif val == "Social Isolation Rating Chart":
188
- # Plotting the bar plot for remote work location
189
  sns.barplot(x='Access_to_Mental_Health_Resources', y='Social_Isolation_Rating', data=df[df["Work_Location"] == "Remote"])
190
  plt.title('Social Isolation Rating for Remote Workers with/without Access to Mental Health Resources')
191
  st.pyplot(plt)
 
192
  elif val == "Stress Level By Job Role":
193
  plt.figure(figsize=(12,6))
194
  sns.countplot(x='Job_Role',hue='Stress_Level',data=df_original)
195
  plt.title('Stress Level by Job Role')
196
  st.pyplot(plt)
 
197
  elif val == "Mental Health Condition By Job Role":
198
  plt.figure(figsize=(15,10))
199
  sns.countplot(x='Job_Role', hue='Mental_Health_Condition', data=df)
200
  plt.title('Mental Health Condition by Job Role')
201
  st.pyplot(plt)
 
202
  elif val == "Productivity Rate By Work Location":
203
  plt.figure(figsize=(12,6))
204
  sns.countplot(data=df, x='Work_Location', hue='Productivity_Change')
205
  plt.title('Rate Of Productivity By Work Location')
206
  st.pyplot(plt)
 
207
  elif val == "Mental Health Condition By Work Location":
208
  plt.figure(figsize=(12,6))
209
  sns.countplot(x='Work_Location',hue='Mental_Health_Condition',data=df)
210
  plt.title('Mental Health Condition by Work Location')
211
- st.pyplot(plt)
 
 
1
  import streamlit as st
2
  import pandas as pd
3
  import matplotlib.pyplot as plt
 
4
  import seaborn as sns
5
 
6
  def map_values(arr):
 
124
  st.header('Remote Work & Mental Health Prediction Project', divider='rainbow')
125
  st.subheader('Exploratory Data Analysis')
126
 
127
+ eda_list = ["Numerical Distributions", "Age Groups Chart", "Job Roles Chart", "Stress Level Correlations", "Social Isolation Rating Chart", "Stress Level By Job Role", "Mental Health Condition By Job Role", "Productivity Rate By Work Location", "Mental Health Condition By Work Location"]
 
128
  val = st.sidebar.radio("Choose plot to show", eda_list)
129
  if val == "Numerical Distributions":
130
  numerical_columns = ['Age', 'Years_of_Experience', 'Hours_Worked_Per_Week', 'Number_of_Virtual_Meetings']
131
  df[numerical_columns].hist(figsize=(12, 8))
132
+ plt.tight_layout()
 
 
133
  plt.suptitle("Numerical values histogram")
134
  plt.subplots_adjust(left=0.1, right=0.9, top=0.85, bottom=0.1, wspace=0.2, hspace=0.2)
135
  st.pyplot(plt)
 
141
  plt.tight_layout()
142
  plt.title("Age Group Pie Chart")
143
  plt.subplots_adjust(left=0.1, right=0.9, top=0.85, bottom=0.1, wspace=0.2, hspace=0.2)
 
 
 
 
144
  st.pyplot(plt)
145
  st.write("Insight: The age distribution seems to be quite balanced from the 3 categories, with the most data lies in Mid-career adults (Aged 35-49)")
146
  elif val == "Job Roles Chart":
 
150
  plt.tight_layout()
151
  plt.title("Job Roles Pie Chart")
152
  plt.subplots_adjust(left=0.1, right=0.9, top=0.85, bottom=0.1, wspace=0.2, hspace=0.2)
 
 
 
 
153
  st.markdown('<div class="center">', unsafe_allow_html=True)
154
  st.pyplot(plt)
155
  st.markdown('</div>', unsafe_allow_html=True)
156
  st.write("Insight: The job roles distribution seem to be quite balanced from the 7 categories, with the most data being Project Manager with 14.8% of the total")
 
 
 
 
 
 
 
 
 
 
 
157
  elif val == "Stress Level Correlations":
158
  numerical_columns_with_stress_level = ['Stress_Level', 'Age', 'Years_of_Experience', 'Hours_Worked_Per_Week', 'Number_of_Virtual_Meetings']
159
  correlation_matrix = df[numerical_columns_with_stress_level].corr()
 
161
  sns.heatmap(correlation_matrix, annot=True, cmap='viridis', linewidths=0.5) # Replace 'viridis' with your preferred colormap
162
  plt.title('Correlation Matrix of numerical columns and stress level with Heatmap')
163
  st.pyplot(plt)
164
+ st.write("Insight: There does not seem to be much correlations between stress level and numerical features")
165
  elif val == "Social Isolation Rating Chart":
 
166
  sns.barplot(x='Access_to_Mental_Health_Resources', y='Social_Isolation_Rating', data=df[df["Work_Location"] == "Remote"])
167
  plt.title('Social Isolation Rating for Remote Workers with/without Access to Mental Health Resources')
168
  st.pyplot(plt)
169
+ st.write("Insight: It appears that employees with access to mental health resources feel slightly socially more isolated")
170
  elif val == "Stress Level By Job Role":
171
  plt.figure(figsize=(12,6))
172
  sns.countplot(x='Job_Role',hue='Stress_Level',data=df_original)
173
  plt.title('Stress Level by Job Role')
174
  st.pyplot(plt)
175
+ st.write("Insight: There does not seem to be any role that's too stressed, or least stressed. Everything seems balanced.")
176
  elif val == "Mental Health Condition By Job Role":
177
  plt.figure(figsize=(15,10))
178
  sns.countplot(x='Job_Role', hue='Mental_Health_Condition', data=df)
179
  plt.title('Mental Health Condition by Job Role')
180
  st.pyplot(plt)
181
+ st.write("Insight: It can be seen that the role with the most burnout and least normal is Data Scientist. All others seem to be quite balanced.")
182
  elif val == "Productivity Rate By Work Location":
183
  plt.figure(figsize=(12,6))
184
  sns.countplot(data=df, x='Work_Location', hue='Productivity_Change')
185
  plt.title('Rate Of Productivity By Work Location')
186
  st.pyplot(plt)
187
+ st.write("Insight: There does not seem to be any work location that contribute to increase/decrease in productivity change, all seems balanced.")
188
  elif val == "Mental Health Condition By Work Location":
189
  plt.figure(figsize=(12,6))
190
  sns.countplot(x='Work_Location',hue='Mental_Health_Condition',data=df)
191
  plt.title('Mental Health Condition by Work Location')
192
+ st.pyplot(plt)
193
+ st.write("Insight: There does not seem to be any work location that contribute to more burnout or more depression, all seems balanced.")