Spaces:

krishaamer
/

ziran

Sleeping

App Files Files Community

krishaamer commited on Nov 7, 2023

Commit

6e83e2f

1 Parent(s): c7e0507

Add descriptions

Browse files

Files changed (5) hide show

app.py +2 -2
fields/translation_mapping.py +6 -6
page_attitudes.py +4 -4
page_demographics.py +13 -1
page_tests.py +31 -26

app.py CHANGED Viewed

@@ -29,12 +29,12 @@ def load_data():
 # Sidebar navigation using buttons
-st.sidebar.title("Navigation")
 if st.sidebar.button("Introduction"):
     st.session_state['page'] = 'Home'
 if st.sidebar.button("Demographics"):
     st.session_state['page'] = 'Demographics'
-if st.sidebar.button("Student Attitudes"):
     st.session_state['page'] = 'Attitudes'
 if st.sidebar.button("Personas"):
     st.session_state['page'] = 'Personas'

 # Sidebar navigation using buttons
+st.sidebar.title("Taiwanese College Students")
 if st.sidebar.button("Introduction"):
     st.session_state['page'] = 'Home'
 if st.sidebar.button("Demographics"):
     st.session_state['page'] = 'Demographics'
+if st.sidebar.button("Attitudes"):
     st.session_state['page'] = 'Attitudes'
 if st.sidebar.button("Personas"):
     st.session_state['page'] = 'Personas'

fields/translation_mapping.py CHANGED Viewed

@@ -1,9 +1,9 @@
 translation_mapping = {
-    '購物習慣': 'Shopping Habits',
-    '存錢和投資習慣': 'Saving and Investing Habits',
-    '台灣的經濟': "Taiwan's Economy",
-    'AI使用習慣': 'AI Usage Habits',
-    '台灣的大自然': "Taiwan's Natural Environment",
-    '環保習慣': 'Environmental Habits',
     '學習環境': 'Learning Environment'
 }

 translation_mapping = {
+    '購物習慣': 'Shopping',
+    '存錢和投資習慣': 'Saving and Investing',
+    '台灣的經濟': "Economy",
+    'AI使用習慣': 'AI Usage',
+    '台灣的大自然': "Nature",
+    '環保習慣': 'Environmental Protection',
     '學習環境': 'Learning Environment'
 }

page_attitudes.py CHANGED Viewed

@@ -9,8 +9,8 @@ from fields.translation_mapping import translation_mapping
 @st.cache_data
 def show(df):
-    st.title("Students Attitudes (Overall)")
-    st.write("Students Attitudes across all Likert fields without clustering")
     # Chinese font
     chinese_font = FontProperties(fname='mingliu.ttf')
@@ -45,14 +45,14 @@ def show(df):
             for i, field in enumerate(fields):
                 # Create the bar plot
                 sns.countplot(
-                    x=f"{field} ({field_translation_mapping[category][i]})", data=df_translated, ax=axs[i], palette="coolwarm")
                 # Add title and labels
                 title_chinese = field
                 title_english = field_translation_mapping[category][i]
                 axs[i].set_title(
                     f"{title_chinese}\n{title_english}", fontproperties=chinese_font)
-                axs[i].set_xlabel('Likert Scale')
                 axs[i].set_ylabel('Frequency')
             # Remove any unused subplots

 @st.cache_data
 def show(df):
+    st.title("Student Attitudes (Overall)")
+    st.write("Student Attitudes across all Likert fields without clustering")
     # Chinese font
     chinese_font = FontProperties(fname='mingliu.ttf')
             for i, field in enumerate(fields):
                 # Create the bar plot
                 sns.countplot(
+                    x=f"{field} ({field_translation_mapping[category][i]})", data=df_translated, ax=axs[i], palette=sns.color_palette("pastel"), saturation=1)
                 # Add title and labels
                 title_chinese = field
                 title_english = field_translation_mapping[category][i]
                 axs[i].set_title(
                     f"{title_chinese}\n{title_english}", fontproperties=chinese_font)
+                axs[i].set_xlabel('← Disagreement — Neutral — Agreement →')
                 axs[i].set_ylabel('Frequency')
             # Remove any unused subplots

page_demographics.py CHANGED Viewed

@@ -4,11 +4,23 @@ import pandas as pd
 @st.cache_data
 def show(df):
-    st.title("Students Demographics")
     show_student_counts(df)
     show_student_age_ranking(df)
     generate_university_ranking_table(df)
     show_field_of_study_ranking(df)
     show_mbti_ranking(df)

 @st.cache_data
 def show(df):
+    st.title("Respondent Demographics")
+    st.markdown(
+                f"<h2 style='text-align: center;'>Study Level</h2>", unsafe_allow_html=True)
     show_student_counts(df)
+    st.markdown(
+                f"<h2 style='text-align: center;'>Age Distribution</h2>", unsafe_allow_html=True)
     show_student_age_ranking(df)
+    st.markdown(
+                f"<h2 style='text-align: center;'>University</h2>", unsafe_allow_html=True)
+    st.text("Universities with the most respondets are those where flyers were distributed.")
     generate_university_ranking_table(df)
+    st.markdown(
+                f"<h2 style='text-align: center;'>Field of Study</h2>", unsafe_allow_html=True)
+    st.text("These fields of study have not been normalized.")
     show_field_of_study_ranking(df)
+    st.markdown(
+                f"<h2 style='text-align: center;'>Personality Type</h2>", unsafe_allow_html=True)
     show_mbti_ranking(df)

page_tests.py CHANGED Viewed

@@ -3,45 +3,50 @@ import streamlit as st
 import pandas as pd
 from fields.likert_flat_fields import likert_flat_fields
 @st.cache_data
 def show(df):
     st.title("Statistical Tests")
     show_chi_square_results(df)
 def show_chi_square_results(df):
-    # Perform Chi-Square test on each Likert scale field
     chi_square_results = {}
     for field in likert_flat_fields:
         observed_values = df[field].value_counts().sort_index()
-        # Explicitly convert to float
         observed_values = observed_values.astype(float)
-        # Create a list of expected values with the same length as observed_values
-        expected_values = [len(df) / len(observed_values)
-                           ] * len(observed_values)
-        # Explicitly convert to float
         expected_values = [float(x) for x in expected_values]
-        # Perform Chi-Square Test
-        chi_stat, p_value = chisquare(
-            f_obs=observed_values, f_exp=expected_values)
-        chi_square_results[field] = {
-            'Chi-Square Statistic': chi_stat, 'p-value': p_value}
-    # Convert dictionary to DataFrame
     chi_square_df = pd.DataFrame.from_dict(chi_square_results, orient='index')
-    # Explicitly convert p-values to float and format in scientific notation
     chi_square_df['p-value'] = chi_square_df['p-value'].astype(float)
-    chi_square_df['p-value'] = chi_square_df['p-value'].apply(
-        lambda x: "{:.2e}".format(x))
-    # Display the DataFrame as a table in Streamlit
-    st.table(chi_square_df)

 import pandas as pd
 from fields.likert_flat_fields import likert_flat_fields
 @st.cache_data
 def show(df):
     st.title("Statistical Tests")
     show_chi_square_results(df)
 def show_chi_square_results(df):
     chi_square_results = {}
     for field in likert_flat_fields:
         observed_values = df[field].value_counts().sort_index()
         observed_values = observed_values.astype(float)
+        expected_values = [len(df) / len(observed_values)] * len(observed_values)
         expected_values = [float(x) for x in expected_values]
+        chi_stat, p_value = chisquare(f_obs=observed_values, f_exp=expected_values)
+        chi_square_results[field] = {'Chi-Square Statistic': chi_stat, 'p-value': p_value}
     chi_square_df = pd.DataFrame.from_dict(chi_square_results, orient='index')
     chi_square_df['p-value'] = chi_square_df['p-value'].astype(float)
+    # Reset index to add a sequence number
+    chi_square_df.reset_index(inplace=True)
+    chi_square_df.rename(columns={'index': 'Question'}, inplace=True)
+    # Define thresholds for highlighting
+    chi_square_threshold = 300  # example threshold for high Chi-Square value
+    p_value_threshold = 1e-50   # example threshold for very low p-value
+    # Apply the highlighting
+    def highlight(value):
+        if isinstance(value, float) and value > chi_square_threshold:
+            return "background-color: yellow"
+        elif isinstance(value, float) and value < p_value_threshold:
+            return "background-color: yellow"
+        else:
+            return ""
+     # Apply the highlighting to numeric columns only
+    chi_square_df_styled = chi_square_df.style.applymap(highlight, subset=pd.IndexSlice[:, ['Chi-Square Statistic', 'p-value']])
+    # Convert p-values to string after highlighting
+    chi_square_df['p-value'] = chi_square_df['p-value'].apply(lambda x: "{:.2e}".format(x))
+    # Convert styled DataFrame to HTML after p-value formatting
+    chi_square_html = chi_square_df_styled.to_html(escape=False)
+    # Display the HTML with unsafe_allow_html set to True
+    st.markdown(chi_square_html, unsafe_allow_html=True)