Spaces:
Sleeping
Sleeping
Commit
·
6e83e2f
1
Parent(s):
c7e0507
Add descriptions
Browse files- app.py +2 -2
- fields/translation_mapping.py +6 -6
- page_attitudes.py +4 -4
- page_demographics.py +13 -1
- page_tests.py +31 -26
app.py
CHANGED
|
@@ -29,12 +29,12 @@ def load_data():
|
|
| 29 |
|
| 30 |
|
| 31 |
# Sidebar navigation using buttons
|
| 32 |
-
st.sidebar.title("
|
| 33 |
if st.sidebar.button("Introduction"):
|
| 34 |
st.session_state['page'] = 'Home'
|
| 35 |
if st.sidebar.button("Demographics"):
|
| 36 |
st.session_state['page'] = 'Demographics'
|
| 37 |
-
if st.sidebar.button("
|
| 38 |
st.session_state['page'] = 'Attitudes'
|
| 39 |
if st.sidebar.button("Personas"):
|
| 40 |
st.session_state['page'] = 'Personas'
|
|
|
|
| 29 |
|
| 30 |
|
| 31 |
# Sidebar navigation using buttons
|
| 32 |
+
st.sidebar.title("Taiwanese College Students")
|
| 33 |
if st.sidebar.button("Introduction"):
|
| 34 |
st.session_state['page'] = 'Home'
|
| 35 |
if st.sidebar.button("Demographics"):
|
| 36 |
st.session_state['page'] = 'Demographics'
|
| 37 |
+
if st.sidebar.button("Attitudes"):
|
| 38 |
st.session_state['page'] = 'Attitudes'
|
| 39 |
if st.sidebar.button("Personas"):
|
| 40 |
st.session_state['page'] = 'Personas'
|
fields/translation_mapping.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
translation_mapping = {
|
| 2 |
-
'購物習慣': 'Shopping
|
| 3 |
-
'存錢和投資習慣': 'Saving and Investing
|
| 4 |
-
'台灣的經濟': "
|
| 5 |
-
'AI使用習慣': 'AI Usage
|
| 6 |
-
'台灣的大自然': "
|
| 7 |
-
'環保習慣': 'Environmental
|
| 8 |
'學習環境': 'Learning Environment'
|
| 9 |
}
|
|
|
|
| 1 |
translation_mapping = {
|
| 2 |
+
'購物習慣': 'Shopping',
|
| 3 |
+
'存錢和投資習慣': 'Saving and Investing',
|
| 4 |
+
'台灣的經濟': "Economy",
|
| 5 |
+
'AI使用習慣': 'AI Usage',
|
| 6 |
+
'台灣的大自然': "Nature",
|
| 7 |
+
'環保習慣': 'Environmental Protection',
|
| 8 |
'學習環境': 'Learning Environment'
|
| 9 |
}
|
page_attitudes.py
CHANGED
|
@@ -9,8 +9,8 @@ from fields.translation_mapping import translation_mapping
|
|
| 9 |
|
| 10 |
@st.cache_data
|
| 11 |
def show(df):
|
| 12 |
-
st.title("
|
| 13 |
-
st.write("
|
| 14 |
|
| 15 |
# Chinese font
|
| 16 |
chinese_font = FontProperties(fname='mingliu.ttf')
|
|
@@ -45,14 +45,14 @@ def show(df):
|
|
| 45 |
for i, field in enumerate(fields):
|
| 46 |
# Create the bar plot
|
| 47 |
sns.countplot(
|
| 48 |
-
x=f"{field} ({field_translation_mapping[category][i]})", data=df_translated, ax=axs[i], palette="
|
| 49 |
|
| 50 |
# Add title and labels
|
| 51 |
title_chinese = field
|
| 52 |
title_english = field_translation_mapping[category][i]
|
| 53 |
axs[i].set_title(
|
| 54 |
f"{title_chinese}\n{title_english}", fontproperties=chinese_font)
|
| 55 |
-
axs[i].set_xlabel('
|
| 56 |
axs[i].set_ylabel('Frequency')
|
| 57 |
|
| 58 |
# Remove any unused subplots
|
|
|
|
| 9 |
|
| 10 |
@st.cache_data
|
| 11 |
def show(df):
|
| 12 |
+
st.title("Student Attitudes (Overall)")
|
| 13 |
+
st.write("Student Attitudes across all Likert fields without clustering")
|
| 14 |
|
| 15 |
# Chinese font
|
| 16 |
chinese_font = FontProperties(fname='mingliu.ttf')
|
|
|
|
| 45 |
for i, field in enumerate(fields):
|
| 46 |
# Create the bar plot
|
| 47 |
sns.countplot(
|
| 48 |
+
x=f"{field} ({field_translation_mapping[category][i]})", data=df_translated, ax=axs[i], palette=sns.color_palette("pastel"), saturation=1)
|
| 49 |
|
| 50 |
# Add title and labels
|
| 51 |
title_chinese = field
|
| 52 |
title_english = field_translation_mapping[category][i]
|
| 53 |
axs[i].set_title(
|
| 54 |
f"{title_chinese}\n{title_english}", fontproperties=chinese_font)
|
| 55 |
+
axs[i].set_xlabel('← Disagreement — Neutral — Agreement →')
|
| 56 |
axs[i].set_ylabel('Frequency')
|
| 57 |
|
| 58 |
# Remove any unused subplots
|
page_demographics.py
CHANGED
|
@@ -4,11 +4,23 @@ import pandas as pd
|
|
| 4 |
|
| 5 |
@st.cache_data
|
| 6 |
def show(df):
|
| 7 |
-
st.title("
|
|
|
|
|
|
|
| 8 |
show_student_counts(df)
|
|
|
|
|
|
|
| 9 |
show_student_age_ranking(df)
|
|
|
|
|
|
|
|
|
|
| 10 |
generate_university_ranking_table(df)
|
|
|
|
|
|
|
|
|
|
| 11 |
show_field_of_study_ranking(df)
|
|
|
|
|
|
|
| 12 |
show_mbti_ranking(df)
|
| 13 |
|
| 14 |
|
|
|
|
| 4 |
|
| 5 |
@st.cache_data
|
| 6 |
def show(df):
|
| 7 |
+
st.title("Respondent Demographics")
|
| 8 |
+
st.markdown(
|
| 9 |
+
f"<h2 style='text-align: center;'>Study Level</h2>", unsafe_allow_html=True)
|
| 10 |
show_student_counts(df)
|
| 11 |
+
st.markdown(
|
| 12 |
+
f"<h2 style='text-align: center;'>Age Distribution</h2>", unsafe_allow_html=True)
|
| 13 |
show_student_age_ranking(df)
|
| 14 |
+
st.markdown(
|
| 15 |
+
f"<h2 style='text-align: center;'>University</h2>", unsafe_allow_html=True)
|
| 16 |
+
st.text("Universities with the most respondets are those where flyers were distributed.")
|
| 17 |
generate_university_ranking_table(df)
|
| 18 |
+
st.markdown(
|
| 19 |
+
f"<h2 style='text-align: center;'>Field of Study</h2>", unsafe_allow_html=True)
|
| 20 |
+
st.text("These fields of study have not been normalized.")
|
| 21 |
show_field_of_study_ranking(df)
|
| 22 |
+
st.markdown(
|
| 23 |
+
f"<h2 style='text-align: center;'>Personality Type</h2>", unsafe_allow_html=True)
|
| 24 |
show_mbti_ranking(df)
|
| 25 |
|
| 26 |
|
page_tests.py
CHANGED
|
@@ -3,45 +3,50 @@ import streamlit as st
|
|
| 3 |
import pandas as pd
|
| 4 |
from fields.likert_flat_fields import likert_flat_fields
|
| 5 |
|
| 6 |
-
|
| 7 |
@st.cache_data
|
| 8 |
def show(df):
|
| 9 |
st.title("Statistical Tests")
|
| 10 |
show_chi_square_results(df)
|
| 11 |
|
| 12 |
-
|
| 13 |
def show_chi_square_results(df):
|
| 14 |
-
|
| 15 |
-
# Perform Chi-Square test on each Likert scale field
|
| 16 |
chi_square_results = {}
|
| 17 |
|
| 18 |
for field in likert_flat_fields:
|
| 19 |
observed_values = df[field].value_counts().sort_index()
|
| 20 |
-
|
| 21 |
-
# Explicitly convert to float
|
| 22 |
observed_values = observed_values.astype(float)
|
| 23 |
-
|
| 24 |
-
# Create a list of expected values with the same length as observed_values
|
| 25 |
-
expected_values = [len(df) / len(observed_values)
|
| 26 |
-
] * len(observed_values)
|
| 27 |
-
|
| 28 |
-
# Explicitly convert to float
|
| 29 |
expected_values = [float(x) for x in expected_values]
|
|
|
|
|
|
|
| 30 |
|
| 31 |
-
# Perform Chi-Square Test
|
| 32 |
-
chi_stat, p_value = chisquare(
|
| 33 |
-
f_obs=observed_values, f_exp=expected_values)
|
| 34 |
-
|
| 35 |
-
chi_square_results[field] = {
|
| 36 |
-
'Chi-Square Statistic': chi_stat, 'p-value': p_value}
|
| 37 |
-
|
| 38 |
-
# Convert dictionary to DataFrame
|
| 39 |
chi_square_df = pd.DataFrame.from_dict(chi_square_results, orient='index')
|
| 40 |
-
|
| 41 |
-
# Explicitly convert p-values to float and format in scientific notation
|
| 42 |
chi_square_df['p-value'] = chi_square_df['p-value'].astype(float)
|
| 43 |
-
chi_square_df['p-value'] = chi_square_df['p-value'].apply(
|
| 44 |
-
lambda x: "{:.2e}".format(x))
|
| 45 |
|
| 46 |
-
#
|
| 47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
import pandas as pd
|
| 4 |
from fields.likert_flat_fields import likert_flat_fields
|
| 5 |
|
|
|
|
| 6 |
@st.cache_data
|
| 7 |
def show(df):
|
| 8 |
st.title("Statistical Tests")
|
| 9 |
show_chi_square_results(df)
|
| 10 |
|
|
|
|
| 11 |
def show_chi_square_results(df):
|
|
|
|
|
|
|
| 12 |
chi_square_results = {}
|
| 13 |
|
| 14 |
for field in likert_flat_fields:
|
| 15 |
observed_values = df[field].value_counts().sort_index()
|
|
|
|
|
|
|
| 16 |
observed_values = observed_values.astype(float)
|
| 17 |
+
expected_values = [len(df) / len(observed_values)] * len(observed_values)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
expected_values = [float(x) for x in expected_values]
|
| 19 |
+
chi_stat, p_value = chisquare(f_obs=observed_values, f_exp=expected_values)
|
| 20 |
+
chi_square_results[field] = {'Chi-Square Statistic': chi_stat, 'p-value': p_value}
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
chi_square_df = pd.DataFrame.from_dict(chi_square_results, orient='index')
|
|
|
|
|
|
|
| 23 |
chi_square_df['p-value'] = chi_square_df['p-value'].astype(float)
|
|
|
|
|
|
|
| 24 |
|
| 25 |
+
# Reset index to add a sequence number
|
| 26 |
+
chi_square_df.reset_index(inplace=True)
|
| 27 |
+
chi_square_df.rename(columns={'index': 'Question'}, inplace=True)
|
| 28 |
+
|
| 29 |
+
# Define thresholds for highlighting
|
| 30 |
+
chi_square_threshold = 300 # example threshold for high Chi-Square value
|
| 31 |
+
p_value_threshold = 1e-50 # example threshold for very low p-value
|
| 32 |
+
|
| 33 |
+
# Apply the highlighting
|
| 34 |
+
def highlight(value):
|
| 35 |
+
if isinstance(value, float) and value > chi_square_threshold:
|
| 36 |
+
return "background-color: yellow"
|
| 37 |
+
elif isinstance(value, float) and value < p_value_threshold:
|
| 38 |
+
return "background-color: yellow"
|
| 39 |
+
else:
|
| 40 |
+
return ""
|
| 41 |
+
|
| 42 |
+
# Apply the highlighting to numeric columns only
|
| 43 |
+
chi_square_df_styled = chi_square_df.style.applymap(highlight, subset=pd.IndexSlice[:, ['Chi-Square Statistic', 'p-value']])
|
| 44 |
+
|
| 45 |
+
# Convert p-values to string after highlighting
|
| 46 |
+
chi_square_df['p-value'] = chi_square_df['p-value'].apply(lambda x: "{:.2e}".format(x))
|
| 47 |
+
|
| 48 |
+
# Convert styled DataFrame to HTML after p-value formatting
|
| 49 |
+
chi_square_html = chi_square_df_styled.to_html(escape=False)
|
| 50 |
+
|
| 51 |
+
# Display the HTML with unsafe_allow_html set to True
|
| 52 |
+
st.markdown(chi_square_html, unsafe_allow_html=True)
|