Spaces:
Sleeping
Sleeping
Upload 8 files
Browse files
eda.py
CHANGED
|
@@ -1,6 +1,12 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
import pandas as pd
|
| 3 |
import matplotlib.pyplot as plt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
|
| 5 |
demographics_cols = ['Age_Group', 'Gender', 'Region']
|
| 6 |
job_cols = ['Job_Role', 'Industry', 'Years_of_Experience_Group']
|
|
@@ -104,7 +110,11 @@ st.markdown(
|
|
| 104 |
)
|
| 105 |
|
| 106 |
def app():
|
| 107 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
df['Age_Group'] = df['Age'].apply(categorize_age)
|
| 109 |
df['Years_of_Experience_Group'] = df['Years_of_Experience'].apply(categorize_experience)
|
| 110 |
df['Hours_Worked_Group'] = df['Hours_Worked_Per_Week'].apply(categorize_hours)
|
|
@@ -116,7 +126,7 @@ def app():
|
|
| 116 |
st.subheader('Exploratory Data Analysis')
|
| 117 |
|
| 118 |
# selection = st.sidebar.selectbox("Go to", list(PAGES.keys()))
|
| 119 |
-
eda_list = ["Numerical Distributions", "Age Groups Chart", "Job Roles Chart", "Satisfaction With Remote Work Chart"]
|
| 120 |
val = st.sidebar.radio("Choose plot to show", eda_list)
|
| 121 |
if val == "Numerical Distributions":
|
| 122 |
numerical_columns = ['Age', 'Years_of_Experience', 'Hours_Worked_Per_Week', 'Number_of_Virtual_Meetings']
|
|
@@ -166,4 +176,36 @@ def app():
|
|
| 166 |
plt.ylabel('Count')
|
| 167 |
plt.xticks(rotation=0) # Rotate x-axis labels for better readability
|
| 168 |
plt.legend(title='Satisfaction with Remote Work')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 169 |
st.pyplot(plt)
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
import pandas as pd
|
| 3 |
import matplotlib.pyplot as plt
|
| 4 |
+
import numpy as np
|
| 5 |
+
import seaborn as sns
|
| 6 |
+
|
| 7 |
+
def map_values(arr):
|
| 8 |
+
mapping = {'Low': 0, 'Medium': 1, 'High': 2}
|
| 9 |
+
return [mapping[value] for value in arr]
|
| 10 |
|
| 11 |
demographics_cols = ['Age_Group', 'Gender', 'Region']
|
| 12 |
job_cols = ['Job_Role', 'Industry', 'Years_of_Experience_Group']
|
|
|
|
| 110 |
)
|
| 111 |
|
| 112 |
def app():
|
| 113 |
+
df_original = pd.read_csv("data.csv")
|
| 114 |
+
df = df_original.copy()
|
| 115 |
+
df["Mental_Health_Condition"].fillna("Normal", inplace=True)
|
| 116 |
+
df["Physical_Activity"].fillna("None", inplace=True)
|
| 117 |
+
df['Stress_Level'] = map_values(df["Stress_Level"])
|
| 118 |
df['Age_Group'] = df['Age'].apply(categorize_age)
|
| 119 |
df['Years_of_Experience_Group'] = df['Years_of_Experience'].apply(categorize_experience)
|
| 120 |
df['Hours_Worked_Group'] = df['Hours_Worked_Per_Week'].apply(categorize_hours)
|
|
|
|
| 126 |
st.subheader('Exploratory Data Analysis')
|
| 127 |
|
| 128 |
# selection = st.sidebar.selectbox("Go to", list(PAGES.keys()))
|
| 129 |
+
eda_list = ["Numerical Distributions", "Age Groups Chart", "Job Roles Chart", "Satisfaction With Remote Work Chart", "Stress Level Correlations", "Social Isolation Rating Chart", "Stress Level By Job Role", "Mental Health Condition By Job Role", "Productivity Rate By Work Location", "Mental Health Condition By Work Location"]
|
| 130 |
val = st.sidebar.radio("Choose plot to show", eda_list)
|
| 131 |
if val == "Numerical Distributions":
|
| 132 |
numerical_columns = ['Age', 'Years_of_Experience', 'Hours_Worked_Per_Week', 'Number_of_Virtual_Meetings']
|
|
|
|
| 176 |
plt.ylabel('Count')
|
| 177 |
plt.xticks(rotation=0) # Rotate x-axis labels for better readability
|
| 178 |
plt.legend(title='Satisfaction with Remote Work')
|
| 179 |
+
st.pyplot(plt)
|
| 180 |
+
elif val == "Stress Level Correlations":
|
| 181 |
+
numerical_columns_with_stress_level = ['Stress_Level', 'Age', 'Years_of_Experience', 'Hours_Worked_Per_Week', 'Number_of_Virtual_Meetings']
|
| 182 |
+
correlation_matrix = df[numerical_columns_with_stress_level].corr()
|
| 183 |
+
plt.figure(figsize=(12, 8))
|
| 184 |
+
sns.heatmap(correlation_matrix, annot=True, cmap='viridis', linewidths=0.5) # Replace 'viridis' with your preferred colormap
|
| 185 |
+
plt.title('Correlation Matrix of numerical columns and stress level with Heatmap')
|
| 186 |
+
st.pyplot(plt)
|
| 187 |
+
elif val == "Social Isolation Rating Chart":
|
| 188 |
+
# Plotting the bar plot for remote work location
|
| 189 |
+
sns.barplot(x='Access_to_Mental_Health_Resources', y='Social_Isolation_Rating', data=df[df["Work_Location"] == "Remote"])
|
| 190 |
+
plt.title('Social Isolation Rating for Remote Workers with/without Access to Mental Health Resources')
|
| 191 |
+
st.pyplot(plt)
|
| 192 |
+
elif val == "Stress Level By Job Role":
|
| 193 |
+
plt.figure(figsize=(12,6))
|
| 194 |
+
sns.countplot(x='Job_Role',hue='Stress_Level',data=df_original)
|
| 195 |
+
plt.title('Stress Level by Job Role')
|
| 196 |
+
st.pyplot(plt)
|
| 197 |
+
elif val == "Mental Health Condition By Job Role":
|
| 198 |
+
plt.figure(figsize=(15,10))
|
| 199 |
+
sns.countplot(x='Job_Role', hue='Mental_Health_Condition', data=df)
|
| 200 |
+
plt.title('Mental Health Condition by Job Role')
|
| 201 |
+
st.pyplot(plt)
|
| 202 |
+
elif val == "Productivity Rate By Work Location":
|
| 203 |
+
plt.figure(figsize=(12,6))
|
| 204 |
+
sns.countplot(data=df, x='Work_Location', hue='Productivity_Change')
|
| 205 |
+
plt.title('Rate Of Productivity By Work Location')
|
| 206 |
+
st.pyplot(plt)
|
| 207 |
+
elif val == "Mental Health Condition By Work Location":
|
| 208 |
+
plt.figure(figsize=(12,6))
|
| 209 |
+
sns.countplot(x='Work_Location',hue='Mental_Health_Condition',data=df)
|
| 210 |
+
plt.title('Mental Health Condition by Work Location')
|
| 211 |
st.pyplot(plt)
|