Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
import pandas as pd
|
| 3 |
import numpy as np
|
|
@@ -11,13 +12,18 @@ import shap
|
|
| 11 |
from xgboost import XGBClassifier
|
| 12 |
from sklearn.preprocessing import OneHotEncoder
|
| 13 |
from streamlit_shap import st_shap
|
|
|
|
| 14 |
|
| 15 |
-
#
|
|
|
|
|
|
|
|
|
|
| 16 |
@st.cache_resource
|
| 17 |
def load_data():
|
| 18 |
data = pd.read_csv('data_cleaned_new.csv')
|
| 19 |
return data
|
| 20 |
|
|
|
|
| 21 |
@st.cache_resource
|
| 22 |
def load_model_and_encoder():
|
| 23 |
xgb_model = joblib.load('xgb_model.joblib')
|
|
@@ -26,29 +32,35 @@ def load_model_and_encoder():
|
|
| 26 |
|
| 27 |
xgb_model, ohe = load_model_and_encoder()
|
| 28 |
|
| 29 |
-
#
|
| 30 |
explainer = shap.TreeExplainer(xgb_model)
|
| 31 |
|
| 32 |
# Load the data
|
| 33 |
data = load_data()
|
| 34 |
|
| 35 |
-
#
|
| 36 |
-
st.sidebar.title("Explore Financial Insights")
|
| 37 |
option = st.sidebar.radio(
|
| 38 |
-
"Select
|
| 39 |
-
("Home", 'Description of Variables', "Regional-Based Analysis", "Income-Based Analysis", "Gender-Based Analysis", "Financial
|
| 40 |
)
|
| 41 |
|
| 42 |
-
#
|
| 43 |
-
st.sidebar.markdown("### What is Findex?")
|
| 44 |
-
st.sidebar.write("""
|
| 45 |
-
The Global Findex database provides comprehensive data on how adults worldwide save, borrow, make payments, and manage risk.
|
| 46 |
-
Launched with support from the Bill & Melinda Gates Foundation, the database is updated every three years and is the world’s most
|
| 47 |
-
detailed dataset on how adults use formal and informal financial services. It offers insights into the financial behaviors and
|
| 48 |
-
access to financial systems globally.
|
| 49 |
|
| 50 |
-
|
| 51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
|
| 53 |
# Main section logic
|
| 54 |
if option == "Home":
|
|
@@ -61,7 +73,7 @@ if option == "Home":
|
|
| 61 |
# Round the percentage to 2 decimal places for display
|
| 62 |
economy_df['percentage'] = economy_df['percentage'].round(2)
|
| 63 |
|
| 64 |
-
# Create a choropleth map using Plotly with a green color scheme
|
| 65 |
fig = px.choropleth(
|
| 66 |
economy_df,
|
| 67 |
locations='Country_Economy',
|
|
@@ -79,10 +91,10 @@ if option == "Home":
|
|
| 79 |
hovertext=economy_df['Country_Economy']
|
| 80 |
)
|
| 81 |
|
| 82 |
-
# Add the title to the Plotly chart itself,
|
| 83 |
fig.update_layout(
|
| 84 |
title=dict(
|
| 85 |
-
text="FINDEX 2021 Data
|
| 86 |
font=dict(size=49, color='black', family="Raleway, sans-serif"), # Stylish font and bigger size
|
| 87 |
x=0.5, # Center the title
|
| 88 |
xanchor='center',
|
|
@@ -118,19 +130,28 @@ if option == "Home":
|
|
| 118 |
# Display the Plotly chart first
|
| 119 |
st.plotly_chart(fig, use_container_width=True, config={'displayModeBar': False})
|
| 120 |
|
| 121 |
-
#
|
| 122 |
st.markdown("""
|
| 123 |
-
This application leverages the Global
|
|
|
|
| 124 |
|
| 125 |
-
|
| 126 |
-
- **
|
| 127 |
-
|
| 128 |
-
- **Income-Based Analysis
|
| 129 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
""")
|
| 131 |
|
|
|
|
| 132 |
elif option == "Description of Variables":
|
| 133 |
-
st.markdown("<h2 style='text-align: center;'>
|
| 134 |
st.markdown("""
|
| 135 |
- **Country_Economy**: The name of the country or economy.
|
| 136 |
- **Country_Code**: ISO 3-digit code representing each economy.
|
|
@@ -176,24 +197,22 @@ elif option == "Description of Variables":
|
|
| 176 |
- **Data_Collection_Year**: The year of the data collection.
|
| 177 |
""")
|
| 178 |
|
| 179 |
-
#
|
| 180 |
if option == "Regional-Based Analysis":
|
| 181 |
st.markdown("<h2 style='text-align: center;'>Regional-Based Analysis</h2>", unsafe_allow_html=True)
|
| 182 |
-
st.write("This section allows
|
| 183 |
|
| 184 |
-
#
|
| 185 |
variable_labels = {
|
| 186 |
'Account_At_Financial_Or_Mobile_Money_Provider': 'Account at Financial or Mobile Money Provider',
|
| 187 |
'Saved_Money_Past_12_Months': 'Saved Money Past 12 Months',
|
| 188 |
'Borrowed_Money_Past_12_Months': 'Borrowed Money Past 12 Months',
|
| 189 |
'Paid_Bills_Online': 'Paid Bills Online',
|
| 190 |
-
'Financial_Worry_Old_Age': 'Financial Worry Old Age',
|
| 191 |
'Owns_Mobile_Phone': 'Owns Mobile Phone',
|
| 192 |
-
'Has_Internet_Access': 'Has Internet Access'
|
| 193 |
-
'Made_Digital_Payment': 'Made Digital Payment'
|
| 194 |
}
|
| 195 |
|
| 196 |
-
# List of regions from
|
| 197 |
regions = data['WorldBank_Region'].unique()
|
| 198 |
|
| 199 |
# Multiselect for region selection
|
|
@@ -202,7 +221,7 @@ if option == "Regional-Based Analysis":
|
|
| 202 |
# Filter data based on selected regions
|
| 203 |
regional_data = data[data['WorldBank_Region'].isin(selected_regions)]
|
| 204 |
|
| 205 |
-
# Allow user to choose which variable they want to analyze
|
| 206 |
variable_to_compare = st.selectbox(
|
| 207 |
"Select variable to analyze:",
|
| 208 |
options=list(variable_labels.keys()),
|
|
@@ -219,6 +238,7 @@ if option == "Regional-Based Analysis":
|
|
| 219 |
# Create an interactive Plotly bar chart to compare the regions and education levels
|
| 220 |
fig = px.bar(summary, x='WorldBank_Region', y=f'Average {variable_to_compare}',
|
| 221 |
color='Education_Level',
|
|
|
|
| 222 |
title=f"Comparison of {variable_labels[variable_to_compare]} Across Selected Regions and Education Levels",
|
| 223 |
labels={'WorldBank_Region': 'Region', f'Average {variable_to_compare}': f'Average {variable_labels[variable_to_compare]} (%)'},
|
| 224 |
barmode='group')
|
|
@@ -230,33 +250,31 @@ if option == "Regional-Based Analysis":
|
|
| 230 |
showlegend=True,
|
| 231 |
width=800,
|
| 232 |
height=500,
|
| 233 |
-
margin={"r":0,"t":50,"l":0,"b":50}
|
| 234 |
)
|
| 235 |
|
| 236 |
# Show the chart in Streamlit
|
| 237 |
st.plotly_chart(fig)
|
| 238 |
|
| 239 |
# Summary of the analysis (formatting variable name)
|
| 240 |
-
st.markdown(f"### Summary
|
| 241 |
-
st.write(f"The analysis above shows how {variable_labels[variable_to_compare]} differs across regions and education levels.")
|
| 242 |
st.write("Key takeaways:")
|
| 243 |
for region in selected_regions:
|
| 244 |
region_data = summary[summary['WorldBank_Region'] == region]
|
| 245 |
for educ_level in region_data['Education_Level'].unique():
|
| 246 |
avg_value = region_data[region_data['Education_Level'] == educ_level][f'Average {variable_to_compare}'].values[0]
|
| 247 |
-
st.write(f"- In {region}, individuals with {educ_level} have an average {variable_labels[variable_to_compare].lower()} of {avg_value:.0f}
|
| 248 |
-
|
| 249 |
|
|
|
|
| 250 |
elif option == "Income-Based Analysis":
|
| 251 |
st.markdown("<h2 style='text-align: center;'>Income-Based Analysis</h2>", unsafe_allow_html=True)
|
| 252 |
-
st.write("This section allows
|
| 253 |
|
| 254 |
-
# Create a dictionary mapping original column names to
|
| 255 |
variable_labels_income = {
|
| 256 |
'Account_At_Financial_Or_Mobile_Money_Provider': 'Account at Financial or Mobile Money Provider',
|
| 257 |
'Saved_Money_Past_12_Months': 'Saved Money Past 12 Months',
|
| 258 |
-
'Borrowed_Money_Past_12_Months': 'Borrowed Money Past 12 Months'
|
| 259 |
-
'Made_Digital_Payment': 'Made Digital Payment'
|
| 260 |
}
|
| 261 |
|
| 262 |
# Select Income Quintile
|
|
@@ -296,7 +314,7 @@ elif option == "Income-Based Analysis":
|
|
| 296 |
title=f"{variable_labels_income[indicator]} for Income Quintile {income_quintile}",
|
| 297 |
labels={indicator: variable_labels_income[indicator]},
|
| 298 |
color=indicator,
|
| 299 |
-
color_continuous_scale='
|
| 300 |
)
|
| 301 |
|
| 302 |
st.plotly_chart(fig_income)
|
|
@@ -306,26 +324,25 @@ elif option == "Income-Based Analysis":
|
|
| 306 |
for indicator, percentage in income_summary_dict.items():
|
| 307 |
st.write(f"**{percentage:.1f}% of respondents in Income Quintile {income_quintile} have {variable_labels_income[indicator]}**.")
|
| 308 |
|
| 309 |
-
|
| 310 |
elif option == "Gender-Based Analysis":
|
| 311 |
st.markdown("<h2 style='text-align: center;'>Gender-Based Analysis</h2>", unsafe_allow_html=True)
|
| 312 |
-
st.write("Here
|
| 313 |
|
| 314 |
-
# Create a dictionary mapping original column names to
|
| 315 |
variable_labels_gender = {
|
| 316 |
'Account_At_Financial_Or_Mobile_Money_Provider': 'Account at Financial or Mobile Money Provider',
|
| 317 |
'Saved_Money_Past_12_Months': 'Saved Money Past 12 Months',
|
| 318 |
-
'Borrowed_Money_Past_12_Months': 'Borrowed Money Past 12 Months'
|
| 319 |
-
'Made_Digital_Payment': 'Made Digital Payment'
|
| 320 |
}
|
| 321 |
|
| 322 |
# Gender selection
|
| 323 |
gender = st.radio("Select Gender:", ("Female", "Male"))
|
| 324 |
|
| 325 |
-
# Age group selection
|
| 326 |
age_group = st.selectbox("Select Age Group:", data['age_group'].unique())
|
| 327 |
|
| 328 |
-
# Convert gender to appropriate coding
|
| 329 |
gender_code = 1 if gender == "Female" else 2
|
| 330 |
|
| 331 |
# Filter the data based on gender and age group
|
|
@@ -336,7 +353,7 @@ elif option == "Gender-Based Analysis":
|
|
| 336 |
"Select Financial Indicators to Analyze:",
|
| 337 |
options=list(variable_labels_gender.keys()),
|
| 338 |
format_func=lambda x: variable_labels_gender[x], # Format options without underscores
|
| 339 |
-
default=['Account_At_Financial_Or_Mobile_Money_Provider']
|
| 340 |
)
|
| 341 |
|
| 342 |
st.markdown(f"### Analysis for {gender}s in {age_group} Age Group")
|
|
@@ -362,7 +379,7 @@ elif option == "Gender-Based Analysis":
|
|
| 362 |
title=f"{variable_labels_gender[indicator]} for {gender}s in {age_group} Age Group",
|
| 363 |
labels={indicator: variable_labels_gender[indicator]},
|
| 364 |
color=indicator,
|
| 365 |
-
color_continuous_scale='
|
| 366 |
)
|
| 367 |
|
| 368 |
st.plotly_chart(fig)
|
|
@@ -371,9 +388,14 @@ elif option == "Gender-Based Analysis":
|
|
| 371 |
st.markdown("### Summary")
|
| 372 |
for indicator, percentage in summary_dict.items():
|
| 373 |
st.write(f"**{percentage:.1f}% of {gender}s in the {age_group} age group have {variable_labels_gender[indicator]}**.")
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 377 |
|
| 378 |
# Define the enhanced_recommender function inside the elif block
|
| 379 |
def enhanced_recommender(age, income_quintile, has_debit_card, uses_mobile_money, financial_goal, savings_habit, investment_interest):
|
|
@@ -423,7 +445,7 @@ elif option == "Financial Recommender Engine":
|
|
| 423 |
return recommendations
|
| 424 |
|
| 425 |
# Collect user inputs
|
| 426 |
-
age = st.slider("Your Age", 18,
|
| 427 |
income_quintile = st.slider("Income Quintile (1 = Lowest, 5 = Highest)", 1, 5, 3)
|
| 428 |
has_debit_card = st.radio("Do you have a debit card?", ("Yes", "No"))
|
| 429 |
uses_mobile_money = st.radio("Have you used mobile money?", ("Yes", "No"))
|
|
@@ -434,45 +456,71 @@ elif option == "Financial Recommender Engine":
|
|
| 434 |
# Initialize recommendations as an empty list
|
| 435 |
recommendations = []
|
| 436 |
|
| 437 |
-
if st.button("Get
|
| 438 |
# Call the enhanced_recommender function and generate recommendations
|
| 439 |
recommendations = enhanced_recommender(age, income_quintile, has_debit_card, uses_mobile_money, financial_goal, savings_habit, investment_interest)
|
| 440 |
|
| 441 |
# Check if the recommendations were generated successfully and display them
|
| 442 |
if recommendations:
|
| 443 |
-
st.markdown("###
|
| 444 |
for rec in recommendations:
|
| 445 |
st.write(f"- {rec}")
|
| 446 |
else:
|
| 447 |
-
st.write("Click the 'Get
|
| 448 |
|
| 449 |
-
|
| 450 |
-
|
|
|
|
| 451 |
|
| 452 |
-
# Introduction to the SML Classification page
|
| 453 |
st.write("""
|
| 454 |
-
###
|
| 455 |
-
|
| 456 |
-
|
| 457 |
-
|
| 458 |
-
|
| 459 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 460 |
|
| 461 |
-
|
| 462 |
-
|
|
|
|
| 463 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 464 |
# Collect user inputs
|
|
|
|
| 465 |
place_of_living = st.selectbox('Place of Living', ['Urban Area', 'Rural Area', 'Unknown'])
|
| 466 |
education_level = st.selectbox('Education Level', ['Primary', 'Secondary', 'Tertiary'])
|
| 467 |
age_group = st.selectbox('Age Group', ['Adult', 'Middle Age', 'Older Adult', 'Senior', 'Teen', 'Young Adult'])
|
| 468 |
income_bracket = st.selectbox('Income Bracket', [1, 2, 3, 4, 5])
|
| 469 |
-
|
|
|
|
| 470 |
is_mobileowner = st.checkbox('Is Mobile Owner?')
|
| 471 |
has_internet_access = st.checkbox('Has Internet Access?')
|
| 472 |
-
|
| 473 |
-
high_income_region = st.checkbox('High Income Region?')
|
| 474 |
|
| 475 |
-
# Prepare categorical and
|
| 476 |
cat_features = pd.DataFrame({
|
| 477 |
'Place of living': [place_of_living],
|
| 478 |
'Education Level': [education_level],
|
|
@@ -488,8 +536,9 @@ elif option == "SML Classification":
|
|
| 488 |
'Income Bracket': [income_bracket] # Directly use Income Bracket as numerical
|
| 489 |
})
|
| 490 |
|
|
|
|
| 491 |
cat_encoded = pd.DataFrame(ohe.transform(cat_features),
|
| 492 |
-
|
| 493 |
|
| 494 |
# Combine categorical and numerical features
|
| 495 |
features = pd.concat([num_features, cat_encoded], axis=1)
|
|
@@ -503,15 +552,49 @@ elif option == "SML Classification":
|
|
| 503 |
if predicted_saved >= 0.5:
|
| 504 |
st.success("Saving habit is likely.")
|
| 505 |
else:
|
| 506 |
-
st.error("Saving habit is unlikely.")
|
| 507 |
|
| 508 |
# SHAP explanation
|
| 509 |
st.subheader('Feature Contributions 🤖')
|
|
|
|
|
|
|
|
|
|
|
|
|
| 510 |
shap_values = explainer.shap_values(features)
|
| 511 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 512 |
|
| 513 |
-
|
| 514 |
-
|
| 515 |
-
|
| 516 |
-
|
| 517 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Importing the necessary libraries
|
| 2 |
import streamlit as st
|
| 3 |
import pandas as pd
|
| 4 |
import numpy as np
|
|
|
|
| 12 |
from xgboost import XGBClassifier
|
| 13 |
from sklearn.preprocessing import OneHotEncoder
|
| 14 |
from streamlit_shap import st_shap
|
| 15 |
+
from duckduckgo_search import DDGS
|
| 16 |
|
| 17 |
+
# Setting up the pace icon
|
| 18 |
+
st.set_page_config(page_icon="📊")
|
| 19 |
+
|
| 20 |
+
# Cache the data to avoid loading it multiple times
|
| 21 |
@st.cache_resource
|
| 22 |
def load_data():
|
| 23 |
data = pd.read_csv('data_cleaned_new.csv')
|
| 24 |
return data
|
| 25 |
|
| 26 |
+
# Cache the model and encoder to avoid loading them multiple times
|
| 27 |
@st.cache_resource
|
| 28 |
def load_model_and_encoder():
|
| 29 |
xgb_model = joblib.load('xgb_model.joblib')
|
|
|
|
| 32 |
|
| 33 |
xgb_model, ohe = load_model_and_encoder()
|
| 34 |
|
| 35 |
+
# SHAP explainer
|
| 36 |
explainer = shap.TreeExplainer(xgb_model)
|
| 37 |
|
| 38 |
# Load the data
|
| 39 |
data = load_data()
|
| 40 |
|
| 41 |
+
# Making and naming the sidebars
|
| 42 |
+
st.sidebar.title("Explore Financial Insights and AI-Powered Tools")
|
| 43 |
option = st.sidebar.radio(
|
| 44 |
+
"Select section:",
|
| 45 |
+
("Home", 'Description of Variables', "Regional-Based Analysis", "Income-Based Analysis", "Gender-Based Analysis", "Financial Advice", "Financial AI Helper", "Predict Financial Savings Behavior")
|
| 46 |
)
|
| 47 |
|
| 48 |
+
# Addding a summary of FINDEX at the bottom of the sidebar with link
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
|
| 50 |
+
st.sidebar.markdown("""
|
| 51 |
+
<div style='margin-bottom: 20px;'>
|
| 52 |
+
<h3 style='margin-bottom: -15px;'>What is FINDEX?</h3>
|
| 53 |
+
<p style='margin-bottom: 20px;'>
|
| 54 |
+
The Global Findex database provides comprehensive data on how adults worldwide save, borrow, make payments, and manage risk.
|
| 55 |
+
Launched with support from the Bill & Melinda Gates Foundation, the database is updated every three years and is the world’s most
|
| 56 |
+
detailed dataset on how adults use formal and informal financial services. It offers insights into the financial behaviors and
|
| 57 |
+
access to financial systems globally.
|
| 58 |
+
</p>
|
| 59 |
+
</div>
|
| 60 |
+
""", unsafe_allow_html=True)
|
| 61 |
+
|
| 62 |
+
st.sidebar.image('Findex.png', use_column_width=True)
|
| 63 |
+
st.sidebar.markdown('For more information, visit:<br>[Global Findex Database](https://globalfindex.worldbank.org/)', unsafe_allow_html=True)
|
| 64 |
|
| 65 |
# Main section logic
|
| 66 |
if option == "Home":
|
|
|
|
| 73 |
# Round the percentage to 2 decimal places for display
|
| 74 |
economy_df['percentage'] = economy_df['percentage'].round(2)
|
| 75 |
|
| 76 |
+
# Create a choropleth map using Plotly with a green color scheme to make look like the earth
|
| 77 |
fig = px.choropleth(
|
| 78 |
economy_df,
|
| 79 |
locations='Country_Economy',
|
|
|
|
| 91 |
hovertext=economy_df['Country_Economy']
|
| 92 |
)
|
| 93 |
|
| 94 |
+
# Add the title to the Plotly chart itself, which also functions as the headline for the homepage making
|
| 95 |
fig.update_layout(
|
| 96 |
title=dict(
|
| 97 |
+
text="FINDEX 2021<br><span style='font-size:24px;'>Data Visualization and AI Driven Financial Recommendations</span>", # Title with subtitle
|
| 98 |
font=dict(size=49, color='black', family="Raleway, sans-serif"), # Stylish font and bigger size
|
| 99 |
x=0.5, # Center the title
|
| 100 |
xanchor='center',
|
|
|
|
| 130 |
# Display the Plotly chart first
|
| 131 |
st.plotly_chart(fig, use_container_width=True, config={'displayModeBar': False})
|
| 132 |
|
| 133 |
+
# Discription on home page
|
| 134 |
st.markdown("""
|
| 135 |
+
This application leverages the Global FINDEX 2021 dataset, with over 140,000 participants, to explore financial inclusion and behavior across various economies worldwide.
|
| 136 |
+
Instantly visualize the percentage of respondents from each region who participate in various financial services and gain insights into financial trends and behaviors.
|
| 137 |
|
| 138 |
+
Features of this application include:
|
| 139 |
+
- **Regional Analysis:** Explore financial trends and behaviors by country and region, identifying disparities in access to financial systems.
|
| 140 |
+
|
| 141 |
+
- **Income-Based Analysis:** Analyze financial behaviors like savings, borrowing, and digital payments across different income levels.
|
| 142 |
+
|
| 143 |
+
- **Gender-Based Analysis:** Compare financial inclusion patterns between genders, looking into variables such as account ownership, borrowing, and savings behavior.
|
| 144 |
+
|
| 145 |
+
- **Financial Advice:** Receive tailored financial advice based on inputs related to the FINDEX dataset, offering insights into financial behaviors and decision-making.
|
| 146 |
+
|
| 147 |
+
- **Financial AI Helper:** Receive personalized financial guidance and recommendations based on individual data inputs, leveraging AI to provide actionable advice.
|
| 148 |
+
|
| 149 |
+
- **Predict Financial Savings Behavior:** Use a Supervised Machine Learning model to predict whether an individual has saved money based on socioeconomic factors, with AI-driven insights explaining the outcome.
|
| 150 |
""")
|
| 151 |
|
| 152 |
+
# Second section for the description of variables
|
| 153 |
elif option == "Description of Variables":
|
| 154 |
+
st.markdown("<h2 style='text-align: center;'>Description of Variables</h2>", unsafe_allow_html=True)
|
| 155 |
st.markdown("""
|
| 156 |
- **Country_Economy**: The name of the country or economy.
|
| 157 |
- **Country_Code**: ISO 3-digit code representing each economy.
|
|
|
|
| 197 |
- **Data_Collection_Year**: The year of the data collection.
|
| 198 |
""")
|
| 199 |
|
| 200 |
+
# Third section for the regional-based analysis
|
| 201 |
if option == "Regional-Based Analysis":
|
| 202 |
st.markdown("<h2 style='text-align: center;'>Regional-Based Analysis</h2>", unsafe_allow_html=True)
|
| 203 |
+
st.write("This section allows for exploration of financial trends and behaviors, including savings, borrowing, and digital payments, across various regions. It's possible to access how financial systems differs between regions and examine disparities in financial inclusion globally.")
|
| 204 |
|
| 205 |
+
# Creating a dictionary mapping to get rid of underscores in the variable names making them more readable
|
| 206 |
variable_labels = {
|
| 207 |
'Account_At_Financial_Or_Mobile_Money_Provider': 'Account at Financial or Mobile Money Provider',
|
| 208 |
'Saved_Money_Past_12_Months': 'Saved Money Past 12 Months',
|
| 209 |
'Borrowed_Money_Past_12_Months': 'Borrowed Money Past 12 Months',
|
| 210 |
'Paid_Bills_Online': 'Paid Bills Online',
|
|
|
|
| 211 |
'Owns_Mobile_Phone': 'Owns Mobile Phone',
|
| 212 |
+
'Has_Internet_Access': 'Has Internet Access'
|
|
|
|
| 213 |
}
|
| 214 |
|
| 215 |
+
# List of regions from the dataset
|
| 216 |
regions = data['WorldBank_Region'].unique()
|
| 217 |
|
| 218 |
# Multiselect for region selection
|
|
|
|
| 221 |
# Filter data based on selected regions
|
| 222 |
regional_data = data[data['WorldBank_Region'].isin(selected_regions)]
|
| 223 |
|
| 224 |
+
# Allow user to choose which variable they want to analyze
|
| 225 |
variable_to_compare = st.selectbox(
|
| 226 |
"Select variable to analyze:",
|
| 227 |
options=list(variable_labels.keys()),
|
|
|
|
| 238 |
# Create an interactive Plotly bar chart to compare the regions and education levels
|
| 239 |
fig = px.bar(summary, x='WorldBank_Region', y=f'Average {variable_to_compare}',
|
| 240 |
color='Education_Level',
|
| 241 |
+
color_continuous_scale='Teal',
|
| 242 |
title=f"Comparison of {variable_labels[variable_to_compare]} Across Selected Regions and Education Levels",
|
| 243 |
labels={'WorldBank_Region': 'Region', f'Average {variable_to_compare}': f'Average {variable_labels[variable_to_compare]} (%)'},
|
| 244 |
barmode='group')
|
|
|
|
| 250 |
showlegend=True,
|
| 251 |
width=800,
|
| 252 |
height=500,
|
| 253 |
+
margin={"r":0,"t":50,"l":0,"b":50}
|
| 254 |
)
|
| 255 |
|
| 256 |
# Show the chart in Streamlit
|
| 257 |
st.plotly_chart(fig)
|
| 258 |
|
| 259 |
# Summary of the analysis (formatting variable name)
|
| 260 |
+
st.markdown(f"### Summary")
|
|
|
|
| 261 |
st.write("Key takeaways:")
|
| 262 |
for region in selected_regions:
|
| 263 |
region_data = summary[summary['WorldBank_Region'] == region]
|
| 264 |
for educ_level in region_data['Education_Level'].unique():
|
| 265 |
avg_value = region_data[region_data['Education_Level'] == educ_level][f'Average {variable_to_compare}'].values[0]
|
| 266 |
+
st.write(f"- **In {region}, individuals with {educ_level} have an average {variable_labels[variable_to_compare].lower()} of {avg_value:.0f}%.**")
|
|
|
|
| 267 |
|
| 268 |
+
# fourth section for the income-based analysis
|
| 269 |
elif option == "Income-Based Analysis":
|
| 270 |
st.markdown("<h2 style='text-align: center;'>Income-Based Analysis</h2>", unsafe_allow_html=True)
|
| 271 |
+
st.write("This section allows for comparasions of financial behaviors such as having an financial acount aswell as savings and borrowing across different income levels.")
|
| 272 |
|
| 273 |
+
# Create a dictionary mapping original column names to remove underscores and make them more readable
|
| 274 |
variable_labels_income = {
|
| 275 |
'Account_At_Financial_Or_Mobile_Money_Provider': 'Account at Financial or Mobile Money Provider',
|
| 276 |
'Saved_Money_Past_12_Months': 'Saved Money Past 12 Months',
|
| 277 |
+
'Borrowed_Money_Past_12_Months': 'Borrowed Money Past 12 Months'
|
|
|
|
| 278 |
}
|
| 279 |
|
| 280 |
# Select Income Quintile
|
|
|
|
| 314 |
title=f"{variable_labels_income[indicator]} for Income Quintile {income_quintile}",
|
| 315 |
labels={indicator: variable_labels_income[indicator]},
|
| 316 |
color=indicator,
|
| 317 |
+
color_continuous_scale='Teal'
|
| 318 |
)
|
| 319 |
|
| 320 |
st.plotly_chart(fig_income)
|
|
|
|
| 324 |
for indicator, percentage in income_summary_dict.items():
|
| 325 |
st.write(f"**{percentage:.1f}% of respondents in Income Quintile {income_quintile} have {variable_labels_income[indicator]}**.")
|
| 326 |
|
| 327 |
+
# Fifth section
|
| 328 |
elif option == "Gender-Based Analysis":
|
| 329 |
st.markdown("<h2 style='text-align: center;'>Gender-Based Analysis</h2>", unsafe_allow_html=True)
|
| 330 |
+
st.write("Here it's possible to visualize financial behaviors such as savings and borrowing for selected gender and age groups.")
|
| 331 |
|
| 332 |
+
# Create a dictionary mapping original column names to remove underscores and make them more readable
|
| 333 |
variable_labels_gender = {
|
| 334 |
'Account_At_Financial_Or_Mobile_Money_Provider': 'Account at Financial or Mobile Money Provider',
|
| 335 |
'Saved_Money_Past_12_Months': 'Saved Money Past 12 Months',
|
| 336 |
+
'Borrowed_Money_Past_12_Months': 'Borrowed Money Past 12 Months'
|
|
|
|
| 337 |
}
|
| 338 |
|
| 339 |
# Gender selection
|
| 340 |
gender = st.radio("Select Gender:", ("Female", "Male"))
|
| 341 |
|
| 342 |
+
# Age group selection
|
| 343 |
age_group = st.selectbox("Select Age Group:", data['age_group'].unique())
|
| 344 |
|
| 345 |
+
# Convert gender to appropriate coding
|
| 346 |
gender_code = 1 if gender == "Female" else 2
|
| 347 |
|
| 348 |
# Filter the data based on gender and age group
|
|
|
|
| 353 |
"Select Financial Indicators to Analyze:",
|
| 354 |
options=list(variable_labels_gender.keys()),
|
| 355 |
format_func=lambda x: variable_labels_gender[x], # Format options without underscores
|
| 356 |
+
default=['Account_At_Financial_Or_Mobile_Money_Provider'] # Default is financial account ownership
|
| 357 |
)
|
| 358 |
|
| 359 |
st.markdown(f"### Analysis for {gender}s in {age_group} Age Group")
|
|
|
|
| 379 |
title=f"{variable_labels_gender[indicator]} for {gender}s in {age_group} Age Group",
|
| 380 |
labels={indicator: variable_labels_gender[indicator]},
|
| 381 |
color=indicator,
|
| 382 |
+
color_continuous_scale='Teal'
|
| 383 |
)
|
| 384 |
|
| 385 |
st.plotly_chart(fig)
|
|
|
|
| 388 |
st.markdown("### Summary")
|
| 389 |
for indicator, percentage in summary_dict.items():
|
| 390 |
st.write(f"**{percentage:.1f}% of {gender}s in the {age_group} age group have {variable_labels_gender[indicator]}**.")
|
| 391 |
+
|
| 392 |
+
# Sixth section for the financial advice
|
| 393 |
+
elif option == "Financial Advice":
|
| 394 |
+
st.markdown("<h2 style='text-align: center;'>Financial Advice</h2>", unsafe_allow_html=True)
|
| 395 |
+
|
| 396 |
+
st.write("""Based on the information provided, this section offers financial advice to help with financial decisions, derived from the FINDEX dataset. The advice is generated from general trends in financial behavior.
|
| 397 |
+
|
| 398 |
+
For more personalized financial advice tailored to individual circumstances, the AI Financial Helper provides deeper, AI-driven recommendations.""")
|
| 399 |
|
| 400 |
# Define the enhanced_recommender function inside the elif block
|
| 401 |
def enhanced_recommender(age, income_quintile, has_debit_card, uses_mobile_money, financial_goal, savings_habit, investment_interest):
|
|
|
|
| 445 |
return recommendations
|
| 446 |
|
| 447 |
# Collect user inputs
|
| 448 |
+
age = st.slider("Your Age", 18, 100, 30)
|
| 449 |
income_quintile = st.slider("Income Quintile (1 = Lowest, 5 = Highest)", 1, 5, 3)
|
| 450 |
has_debit_card = st.radio("Do you have a debit card?", ("Yes", "No"))
|
| 451 |
uses_mobile_money = st.radio("Have you used mobile money?", ("Yes", "No"))
|
|
|
|
| 456 |
# Initialize recommendations as an empty list
|
| 457 |
recommendations = []
|
| 458 |
|
| 459 |
+
if st.button("Get Advice"):
|
| 460 |
# Call the enhanced_recommender function and generate recommendations
|
| 461 |
recommendations = enhanced_recommender(age, income_quintile, has_debit_card, uses_mobile_money, financial_goal, savings_habit, investment_interest)
|
| 462 |
|
| 463 |
# Check if the recommendations were generated successfully and display them
|
| 464 |
if recommendations:
|
| 465 |
+
st.markdown("### Advice:")
|
| 466 |
for rec in recommendations:
|
| 467 |
st.write(f"- {rec}")
|
| 468 |
else:
|
| 469 |
+
st.write("Click the 'Get Advice' button to receive financial advice based on the FINDEX dataset.")
|
| 470 |
|
| 471 |
+
# Seventh section for the financial AI helper
|
| 472 |
+
elif option == "Financial AI Helper":
|
| 473 |
+
st.markdown("<h2 style='text-align: center;'>Financial AI Helper</h2>", unsafe_allow_html=True)
|
| 474 |
|
|
|
|
| 475 |
st.write("""
|
| 476 |
+
### Personalized Financial Guidance
|
| 477 |
+
Welcome to the **Financial AI Helper!** This smart **AI assistant** is designed to provide you with **personalized financial guidance** based on the information you provide.
|
| 478 |
+
It could either be that you're seeking a further explanation of the advice received in the **Financial Advice section**,
|
| 479 |
+
which is based on the **FINDEX** data variables, or that you're looking for more personalized financial advice from the **AI helper.**
|
| 480 |
+
|
| 481 |
+
Simply enter your information below, and let the **AI** assist you with clear and actionable financial solutions!""")
|
| 482 |
+
|
| 483 |
+
# Container for user input and chat button for the AI assistant
|
| 484 |
+
with st.container():
|
| 485 |
+
user_input = st.text_area("Financial AI Helper", height=200)
|
| 486 |
+
if st.button("Get Financial Advice"):
|
| 487 |
+
results = DDGS().chat(user_input, model='gpt-4o-mini')
|
| 488 |
+
st.write(results)
|
| 489 |
|
| 490 |
+
# Eighth section for the financial savings behavior prediction
|
| 491 |
+
elif option == "Predict Financial Savings Behavior":
|
| 492 |
+
st.markdown("<h2 style='text-align: center;'>Predict Financial Savings Behavior</h2>", unsafe_allow_html=True)
|
| 493 |
|
| 494 |
+
|
| 495 |
+
# Introduction to the Predict Financial Savings Behavior page
|
| 496 |
+
st.write("""
|
| 497 |
+
### Supervised Learning Model with AI-Powered Insights
|
| 498 |
+
This page utilizes a Supervised Machine Learning model to predict whether an individual has saved money in the past year, using data from the FINDEX dataset.
|
| 499 |
+
Based on the provided information, the model analyzes the input and makes a prediction:
|
| 500 |
+
|
| 501 |
+
- **Saving habit is likely:** If the model predicts that the individual is likely to have saved money.
|
| 502 |
+
- **Saving habit is unlikely:** If the model predicts that the individual has not likely saved money.
|
| 503 |
+
|
| 504 |
+
In addition to the prediction, an AI-driven analysis will provide insights and explain the key factors contributing to the outcome.
|
| 505 |
+
This will help clarify how variables such as age, income, employment status, and other socioeconomic factors impact the likelihood of saving.
|
| 506 |
+
Age group, income bracket, and other factors will be used to predict the saving behavior.
|
| 507 |
+
|
| 508 |
+
The ranges for the age group is the following: Teen (15-19)
|
| 509 |
+
Young Adult (20-35), Adult (36-55), Middle Age (36-55), Older Adult (56-69), Senior (56-69), Elderly (70+)""")
|
| 510 |
+
|
| 511 |
# Collect user inputs
|
| 512 |
+
female = st.radio('Gender', ['Female', 'Male'])
|
| 513 |
place_of_living = st.selectbox('Place of Living', ['Urban Area', 'Rural Area', 'Unknown'])
|
| 514 |
education_level = st.selectbox('Education Level', ['Primary', 'Secondary', 'Tertiary'])
|
| 515 |
age_group = st.selectbox('Age Group', ['Adult', 'Middle Age', 'Older Adult', 'Senior', 'Teen', 'Young Adult'])
|
| 516 |
income_bracket = st.selectbox('Income Bracket', [1, 2, 3, 4, 5])
|
| 517 |
+
high_income_region = st.checkbox('High Income Region?')
|
| 518 |
+
employed = st.checkbox('Employed?')
|
| 519 |
is_mobileowner = st.checkbox('Is Mobile Owner?')
|
| 520 |
has_internet_access = st.checkbox('Has Internet Access?')
|
| 521 |
+
|
|
|
|
| 522 |
|
| 523 |
+
# Prepare categorical and numcerial/binary features
|
| 524 |
cat_features = pd.DataFrame({
|
| 525 |
'Place of living': [place_of_living],
|
| 526 |
'Education Level': [education_level],
|
|
|
|
| 536 |
'Income Bracket': [income_bracket] # Directly use Income Bracket as numerical
|
| 537 |
})
|
| 538 |
|
| 539 |
+
# One-hot encode categorical features
|
| 540 |
cat_encoded = pd.DataFrame(ohe.transform(cat_features),
|
| 541 |
+
columns=ohe.get_feature_names_out(['Place of living', 'Education Level', 'Age Group']))
|
| 542 |
|
| 543 |
# Combine categorical and numerical features
|
| 544 |
features = pd.concat([num_features, cat_encoded], axis=1)
|
|
|
|
| 552 |
if predicted_saved >= 0.5:
|
| 553 |
st.success("Saving habit is likely.")
|
| 554 |
else:
|
| 555 |
+
st.error("Saving habit is unlikely.")
|
| 556 |
|
| 557 |
# SHAP explanation
|
| 558 |
st.subheader('Feature Contributions 🤖')
|
| 559 |
+
st.write("""- Blue bars push the probability lower
|
| 560 |
+
while the Red bars push the probability higher""")
|
| 561 |
+
|
| 562 |
+
# Ensure SHAP values are handled properly (flatten if multi-dimensional)
|
| 563 |
shap_values = explainer.shap_values(features)
|
| 564 |
+
if isinstance(shap_values, list):
|
| 565 |
+
shap_values = shap_values[0] # For binary classification, select the first set of SHAP values
|
| 566 |
+
|
| 567 |
+
st_shap(shap.force_plot(explainer.expected_value, shap_values, features), height=175, width=1750)
|
| 568 |
+
|
| 569 |
+
# Extract important features based on SHAP values
|
| 570 |
+
shap_df = pd.DataFrame({
|
| 571 |
+
'Feature': features.columns,
|
| 572 |
+
'SHAP Value': shap_values.flatten() # Ensure SHAP values are flattened
|
| 573 |
+
})
|
| 574 |
+
|
| 575 |
+
# Sort by absolute SHAP value to get the most important features
|
| 576 |
+
shap_df['Absolute SHAP Value'] = shap_df['SHAP Value'].abs()
|
| 577 |
+
top_important_features = shap_df.sort_values(by='Absolute SHAP Value', ascending=False).head(4)
|
| 578 |
+
|
| 579 |
+
# Generate summary of the most important features
|
| 580 |
+
important_features_summary = "\n".join(
|
| 581 |
+
[f"- **{row['Feature']}**: {'Positive' if row['SHAP Value'] > 0 else 'Negative'} contribution"
|
| 582 |
+
for _, row in top_important_features.iterrows()]
|
| 583 |
+
)
|
| 584 |
|
| 585 |
+
# Pass the summary to the AI assistant for commentary
|
| 586 |
+
with st.expander("AI's analysis of the results"):
|
| 587 |
+
# Construct the AI input with explicit instructions
|
| 588 |
+
user_input = (
|
| 589 |
+
f"You are an AI financial expert, comment on why the saving habit was deemed likely or unlikely. "
|
| 590 |
+
f"Interpret the features based on these rules: "
|
| 591 |
+
f"1. Blue bars represent negative contributions to saving likelihood, and red bars represent positive contributions. "
|
| 592 |
+
f"2. If 'Employment' is 0, the person is unemployed which is not good for saving money. "
|
| 593 |
+
f"3. If 'Has Internet Access' is 0, the person does not have internet access which indicaites thay maybe the person does not save money as they do not have money for internet. "
|
| 594 |
+
f"4. If 'Is Mobile Owner' is 0, the person does not have mobile access wich indicates that the person does not save money as the person can not afford a phone. "
|
| 595 |
+
f"5. If 'High Income Region' is 0, the person is from a non-high-income region. If the person is from a high income region they a larger change of saving up money "
|
| 596 |
+
f"Here are the key factors and their contributions: {important_features_summary}"
|
| 597 |
+
)
|
| 598 |
+
# Generate AI response
|
| 599 |
+
ai_response = DDGS().chat(user_input, model='gpt-4o-mini')
|
| 600 |
+
st.markdown(ai_response)
|