Spaces:

stanlys96
/

Hacktiv8_Milestone_2

Sleeping

App Files Files Community

stanlys96 commited on Oct 24, 2024

Commit

26605a1

verified ·

1 Parent(s): f9c43fa

Upload 8 files

Browse files

Files changed (2) hide show

app.py +0 -1
eda.py +9 -27

app.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import streamlit as st
-import pandas as pd
 import eda
 import prediction
 import home

 import streamlit as st
 import eda
 import prediction
 import home

eda.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import streamlit as st
 import pandas as pd
 import matplotlib.pyplot as plt
-import numpy as np
 import seaborn as sns
 def map_values(arr):
@@ -125,15 +124,12 @@ def app():
     st.header('Remote Work & Mental Health Prediction Project', divider='rainbow')
     st.subheader('Exploratory Data Analysis')
-    # selection = st.sidebar.selectbox("Go to", list(PAGES.keys()))
-    eda_list = ["Numerical Distributions", "Age Groups Chart", "Job Roles Chart", "Satisfaction With Remote Work Chart", "Stress Level Correlations", "Social Isolation Rating Chart", "Stress Level By Job Role", "Mental Health Condition By Job Role", "Productivity Rate By Work Location", "Mental Health Condition By Work Location"]
     val = st.sidebar.radio("Choose plot to show", eda_list)
     if val == "Numerical Distributions":
         numerical_columns = ['Age', 'Years_of_Experience', 'Hours_Worked_Per_Week', 'Number_of_Virtual_Meetings']
         df[numerical_columns].hist(figsize=(12, 8))
-        # Show the plot
-        plt.tight_layout()  # Adjust layout
         plt.suptitle("Numerical values histogram")
         plt.subplots_adjust(left=0.1, right=0.9, top=0.85, bottom=0.1, wspace=0.2, hspace=0.2)
         st.pyplot(plt)
@@ -145,10 +141,6 @@ def app():
         plt.tight_layout()
         plt.title("Age Group Pie Chart")
         plt.subplots_adjust(left=0.1, right=0.9, top=0.85, bottom=0.1, wspace=0.2, hspace=0.2)
-        # plt.suptitle('Demographics columns distributions', fontsize=16)
-        # Show the charts
         st.pyplot(plt)
         st.write("Insight: The age distribution seems to be quite balanced from the 3 categories, with the most data lies in Mid-career adults (Aged 35-49)")
     elif val == "Job Roles Chart":
@@ -158,25 +150,10 @@ def app():
         plt.tight_layout()
         plt.title("Job Roles Pie Chart")
         plt.subplots_adjust(left=0.1, right=0.9, top=0.85, bottom=0.1, wspace=0.2, hspace=0.2)
-        # plt.suptitle('Demographics columns distributions', fontsize=16)
-        # Show the charts
         st.markdown('<div class="center">', unsafe_allow_html=True)
         st.pyplot(plt)
         st.markdown('</div>', unsafe_allow_html=True)
         st.write("Insight: The job roles distribution seem to be quite balanced from the 7 categories, with the most data being Project Manager with 14.8% of the total")
-    elif val == "Satisfaction With Remote Work Chart":
-        counts = df.groupby(['Access_to_Mental_Health_Resources', 'Satisfaction_with_Remote_Work']).size().unstack()
-        # Plotting
-        plt.figure(figsize=(10, 6))
-        counts.plot(kind='bar', stacked=False, color=['skyblue', 'coral', 'limegreen'])  # Customize colors as desired
-        plt.title('Satisfaction with Remote Work by Access to Mental Health Resources')
-        plt.xlabel('Access to Mental Health Resources')
-        plt.ylabel('Count')
-        plt.xticks(rotation=0)  # Rotate x-axis labels for better readability
-        plt.legend(title='Satisfaction with Remote Work')
-        st.pyplot(plt)
     elif val == "Stress Level Correlations":
         numerical_columns_with_stress_level = ['Stress_Level', 'Age', 'Years_of_Experience', 'Hours_Worked_Per_Week', 'Number_of_Virtual_Meetings']
         correlation_matrix = df[numerical_columns_with_stress_level].corr()
@@ -184,28 +161,33 @@ def app():
         sns.heatmap(correlation_matrix, annot=True, cmap='viridis', linewidths=0.5)  # Replace 'viridis' with your preferred colormap
         plt.title('Correlation Matrix of numerical columns and stress level with Heatmap')
         st.pyplot(plt)
     elif val == "Social Isolation Rating Chart":
-        # Plotting the bar plot for remote work location
         sns.barplot(x='Access_to_Mental_Health_Resources', y='Social_Isolation_Rating', data=df[df["Work_Location"] == "Remote"])
         plt.title('Social Isolation Rating for Remote Workers with/without Access to Mental Health Resources')
         st.pyplot(plt)
     elif val == "Stress Level By Job Role":
         plt.figure(figsize=(12,6))
         sns.countplot(x='Job_Role',hue='Stress_Level',data=df_original)
         plt.title('Stress Level by Job Role')
         st.pyplot(plt)
     elif val == "Mental Health Condition By Job Role":
         plt.figure(figsize=(15,10))
         sns.countplot(x='Job_Role', hue='Mental_Health_Condition', data=df)
         plt.title('Mental Health Condition by Job Role')
         st.pyplot(plt)
     elif val == "Productivity Rate By Work Location":
         plt.figure(figsize=(12,6))
         sns.countplot(data=df, x='Work_Location', hue='Productivity_Change')
         plt.title('Rate Of Productivity By Work Location')
         st.pyplot(plt)
     elif val == "Mental Health Condition By Work Location":
         plt.figure(figsize=(12,6))
         sns.countplot(x='Work_Location',hue='Mental_Health_Condition',data=df)
         plt.title('Mental Health Condition by Work Location')
-        st.pyplot(plt)

 import streamlit as st
 import pandas as pd
 import matplotlib.pyplot as plt
 import seaborn as sns
 def map_values(arr):
     st.header('Remote Work & Mental Health Prediction Project', divider='rainbow')
     st.subheader('Exploratory Data Analysis')
+    eda_list = ["Numerical Distributions", "Age Groups Chart", "Job Roles Chart", "Stress Level Correlations", "Social Isolation Rating Chart", "Stress Level By Job Role", "Mental Health Condition By Job Role", "Productivity Rate By Work Location", "Mental Health Condition By Work Location"]
     val = st.sidebar.radio("Choose plot to show", eda_list)
     if val == "Numerical Distributions":
         numerical_columns = ['Age', 'Years_of_Experience', 'Hours_Worked_Per_Week', 'Number_of_Virtual_Meetings']
         df[numerical_columns].hist(figsize=(12, 8))
+        plt.tight_layout()
         plt.suptitle("Numerical values histogram")
         plt.subplots_adjust(left=0.1, right=0.9, top=0.85, bottom=0.1, wspace=0.2, hspace=0.2)
         st.pyplot(plt)
         plt.tight_layout()
         plt.title("Age Group Pie Chart")
         plt.subplots_adjust(left=0.1, right=0.9, top=0.85, bottom=0.1, wspace=0.2, hspace=0.2)
         st.pyplot(plt)
         st.write("Insight: The age distribution seems to be quite balanced from the 3 categories, with the most data lies in Mid-career adults (Aged 35-49)")
     elif val == "Job Roles Chart":
         plt.tight_layout()
         plt.title("Job Roles Pie Chart")
         plt.subplots_adjust(left=0.1, right=0.9, top=0.85, bottom=0.1, wspace=0.2, hspace=0.2)
         st.markdown('<div class="center">', unsafe_allow_html=True)
         st.pyplot(plt)
         st.markdown('</div>', unsafe_allow_html=True)
         st.write("Insight: The job roles distribution seem to be quite balanced from the 7 categories, with the most data being Project Manager with 14.8% of the total")
     elif val == "Stress Level Correlations":
         numerical_columns_with_stress_level = ['Stress_Level', 'Age', 'Years_of_Experience', 'Hours_Worked_Per_Week', 'Number_of_Virtual_Meetings']
         correlation_matrix = df[numerical_columns_with_stress_level].corr()
         sns.heatmap(correlation_matrix, annot=True, cmap='viridis', linewidths=0.5)  # Replace 'viridis' with your preferred colormap
         plt.title('Correlation Matrix of numerical columns and stress level with Heatmap')
         st.pyplot(plt)
+        st.write("Insight: There does not seem to be much correlations between stress level and numerical features")
     elif val == "Social Isolation Rating Chart":
         sns.barplot(x='Access_to_Mental_Health_Resources', y='Social_Isolation_Rating', data=df[df["Work_Location"] == "Remote"])
         plt.title('Social Isolation Rating for Remote Workers with/without Access to Mental Health Resources')
         st.pyplot(plt)
+        st.write("Insight: It appears that employees with access to mental health resources feel slightly socially more isolated")
     elif val == "Stress Level By Job Role":
         plt.figure(figsize=(12,6))
         sns.countplot(x='Job_Role',hue='Stress_Level',data=df_original)
         plt.title('Stress Level by Job Role')
         st.pyplot(plt)
+        st.write("Insight: There does not seem to be any role that's too stressed, or least stressed. Everything seems balanced.")
     elif val == "Mental Health Condition By Job Role":
         plt.figure(figsize=(15,10))
         sns.countplot(x='Job_Role', hue='Mental_Health_Condition', data=df)
         plt.title('Mental Health Condition by Job Role')
         st.pyplot(plt)
+        st.write("Insight: It can be seen that the role with the most burnout and least normal is Data Scientist. All others seem to be quite balanced.")
     elif val == "Productivity Rate By Work Location":
         plt.figure(figsize=(12,6))
         sns.countplot(data=df, x='Work_Location', hue='Productivity_Change')
         plt.title('Rate Of Productivity By Work Location')
         st.pyplot(plt)
+        st.write("Insight: There does not seem to be any work location that contribute to increase/decrease in productivity change, all seems balanced.")
     elif val == "Mental Health Condition By Work Location":
         plt.figure(figsize=(12,6))
         sns.countplot(x='Work_Location',hue='Mental_Health_Condition',data=df)
         plt.title('Mental Health Condition by Work Location')
+        st.pyplot(plt)
+        st.write("Insight: There does not seem to be any work location that contribute to more burnout or more depression, all seems balanced.")