stanlys96 commited on
Commit
f9c43fa
·
verified ·
1 Parent(s): afacfe1

Upload 8 files

Browse files
Files changed (1) hide show
  1. eda.py +44 -2
eda.py CHANGED
@@ -1,6 +1,12 @@
1
  import streamlit as st
2
  import pandas as pd
3
  import matplotlib.pyplot as plt
 
 
 
 
 
 
4
 
5
  demographics_cols = ['Age_Group', 'Gender', 'Region']
6
  job_cols = ['Job_Role', 'Industry', 'Years_of_Experience_Group']
@@ -104,7 +110,11 @@ st.markdown(
104
  )
105
 
106
  def app():
107
- df = pd.read_csv('data.csv')
 
 
 
 
108
  df['Age_Group'] = df['Age'].apply(categorize_age)
109
  df['Years_of_Experience_Group'] = df['Years_of_Experience'].apply(categorize_experience)
110
  df['Hours_Worked_Group'] = df['Hours_Worked_Per_Week'].apply(categorize_hours)
@@ -116,7 +126,7 @@ def app():
116
  st.subheader('Exploratory Data Analysis')
117
 
118
  # selection = st.sidebar.selectbox("Go to", list(PAGES.keys()))
119
- eda_list = ["Numerical Distributions", "Age Groups Chart", "Job Roles Chart", "Satisfaction With Remote Work Chart"]
120
  val = st.sidebar.radio("Choose plot to show", eda_list)
121
  if val == "Numerical Distributions":
122
  numerical_columns = ['Age', 'Years_of_Experience', 'Hours_Worked_Per_Week', 'Number_of_Virtual_Meetings']
@@ -166,4 +176,36 @@ def app():
166
  plt.ylabel('Count')
167
  plt.xticks(rotation=0) # Rotate x-axis labels for better readability
168
  plt.legend(title='Satisfaction with Remote Work')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
  st.pyplot(plt)
 
1
  import streamlit as st
2
  import pandas as pd
3
  import matplotlib.pyplot as plt
4
+ import numpy as np
5
+ import seaborn as sns
6
+
7
+ def map_values(arr):
8
+ mapping = {'Low': 0, 'Medium': 1, 'High': 2}
9
+ return [mapping[value] for value in arr]
10
 
11
  demographics_cols = ['Age_Group', 'Gender', 'Region']
12
  job_cols = ['Job_Role', 'Industry', 'Years_of_Experience_Group']
 
110
  )
111
 
112
  def app():
113
+ df_original = pd.read_csv("data.csv")
114
+ df = df_original.copy()
115
+ df["Mental_Health_Condition"].fillna("Normal", inplace=True)
116
+ df["Physical_Activity"].fillna("None", inplace=True)
117
+ df['Stress_Level'] = map_values(df["Stress_Level"])
118
  df['Age_Group'] = df['Age'].apply(categorize_age)
119
  df['Years_of_Experience_Group'] = df['Years_of_Experience'].apply(categorize_experience)
120
  df['Hours_Worked_Group'] = df['Hours_Worked_Per_Week'].apply(categorize_hours)
 
126
  st.subheader('Exploratory Data Analysis')
127
 
128
  # selection = st.sidebar.selectbox("Go to", list(PAGES.keys()))
129
+ eda_list = ["Numerical Distributions", "Age Groups Chart", "Job Roles Chart", "Satisfaction With Remote Work Chart", "Stress Level Correlations", "Social Isolation Rating Chart", "Stress Level By Job Role", "Mental Health Condition By Job Role", "Productivity Rate By Work Location", "Mental Health Condition By Work Location"]
130
  val = st.sidebar.radio("Choose plot to show", eda_list)
131
  if val == "Numerical Distributions":
132
  numerical_columns = ['Age', 'Years_of_Experience', 'Hours_Worked_Per_Week', 'Number_of_Virtual_Meetings']
 
176
  plt.ylabel('Count')
177
  plt.xticks(rotation=0) # Rotate x-axis labels for better readability
178
  plt.legend(title='Satisfaction with Remote Work')
179
+ st.pyplot(plt)
180
+ elif val == "Stress Level Correlations":
181
+ numerical_columns_with_stress_level = ['Stress_Level', 'Age', 'Years_of_Experience', 'Hours_Worked_Per_Week', 'Number_of_Virtual_Meetings']
182
+ correlation_matrix = df[numerical_columns_with_stress_level].corr()
183
+ plt.figure(figsize=(12, 8))
184
+ sns.heatmap(correlation_matrix, annot=True, cmap='viridis', linewidths=0.5) # Replace 'viridis' with your preferred colormap
185
+ plt.title('Correlation Matrix of numerical columns and stress level with Heatmap')
186
+ st.pyplot(plt)
187
+ elif val == "Social Isolation Rating Chart":
188
+ # Plotting the bar plot for remote work location
189
+ sns.barplot(x='Access_to_Mental_Health_Resources', y='Social_Isolation_Rating', data=df[df["Work_Location"] == "Remote"])
190
+ plt.title('Social Isolation Rating for Remote Workers with/without Access to Mental Health Resources')
191
+ st.pyplot(plt)
192
+ elif val == "Stress Level By Job Role":
193
+ plt.figure(figsize=(12,6))
194
+ sns.countplot(x='Job_Role',hue='Stress_Level',data=df_original)
195
+ plt.title('Stress Level by Job Role')
196
+ st.pyplot(plt)
197
+ elif val == "Mental Health Condition By Job Role":
198
+ plt.figure(figsize=(15,10))
199
+ sns.countplot(x='Job_Role', hue='Mental_Health_Condition', data=df)
200
+ plt.title('Mental Health Condition by Job Role')
201
+ st.pyplot(plt)
202
+ elif val == "Productivity Rate By Work Location":
203
+ plt.figure(figsize=(12,6))
204
+ sns.countplot(data=df, x='Work_Location', hue='Productivity_Change')
205
+ plt.title('Rate Of Productivity By Work Location')
206
+ st.pyplot(plt)
207
+ elif val == "Mental Health Condition By Work Location":
208
+ plt.figure(figsize=(12,6))
209
+ sns.countplot(x='Work_Location',hue='Mental_Health_Condition',data=df)
210
+ plt.title('Mental Health Condition by Work Location')
211
  st.pyplot(plt)