Bjerring98 commited on
Commit
3320beb
·
verified ·
1 Parent(s): d676f60

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +166 -83
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import streamlit as st
2
  import pandas as pd
3
  import numpy as np
@@ -11,13 +12,18 @@ import shap
11
  from xgboost import XGBClassifier
12
  from sklearn.preprocessing import OneHotEncoder
13
  from streamlit_shap import st_shap
 
14
 
15
- # Cache the data
 
 
 
16
  @st.cache_resource
17
  def load_data():
18
  data = pd.read_csv('data_cleaned_new.csv')
19
  return data
20
 
 
21
  @st.cache_resource
22
  def load_model_and_encoder():
23
  xgb_model = joblib.load('xgb_model.joblib')
@@ -26,29 +32,35 @@ def load_model_and_encoder():
26
 
27
  xgb_model, ohe = load_model_and_encoder()
28
 
29
- # Create SHAP explainer
30
  explainer = shap.TreeExplainer(xgb_model)
31
 
32
  # Load the data
33
  data = load_data()
34
 
35
- # Sidebars
36
- st.sidebar.title("Explore Financial Insights")
37
  option = st.sidebar.radio(
38
- "Select an analysis section:",
39
- ("Home", 'Description of Variables', "Regional-Based Analysis", "Income-Based Analysis", "Gender-Based Analysis", "Financial Recommender Engine", "SML Classification")
40
  )
41
 
42
- # Add a summary of Findex at the bottom of the sidebar
43
- st.sidebar.markdown("### What is Findex?")
44
- st.sidebar.write("""
45
- The Global Findex database provides comprehensive data on how adults worldwide save, borrow, make payments, and manage risk.
46
- Launched with support from the Bill & Melinda Gates Foundation, the database is updated every three years and is the world’s most
47
- detailed dataset on how adults use formal and informal financial services. It offers insights into the financial behaviors and
48
- access to financial systems globally.
49
 
50
- For more information, visit the [Global Findex website](https://www.worldbank.org/en/publication/globalfindex).
51
- """)
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
  # Main section logic
54
  if option == "Home":
@@ -61,7 +73,7 @@ if option == "Home":
61
  # Round the percentage to 2 decimal places for display
62
  economy_df['percentage'] = economy_df['percentage'].round(2)
63
 
64
- # Create a choropleth map using Plotly with a green color scheme
65
  fig = px.choropleth(
66
  economy_df,
67
  locations='Country_Economy',
@@ -79,10 +91,10 @@ if option == "Home":
79
  hovertext=economy_df['Country_Economy']
80
  )
81
 
82
- # Add the title to the Plotly chart itself, making it bold and larger
83
  fig.update_layout(
84
  title=dict(
85
- text="FINDEX 2021 Data Visualizer", # Title text
86
  font=dict(size=49, color='black', family="Raleway, sans-serif"), # Stylish font and bigger size
87
  x=0.5, # Center the title
88
  xanchor='center',
@@ -118,19 +130,28 @@ if option == "Home":
118
  # Display the Plotly chart first
119
  st.plotly_chart(fig, use_container_width=True, config={'displayModeBar': False})
120
 
121
- # Now display the Financial Inclusion and Behaviour description
122
  st.markdown("""
123
- This application leverages the Global Findex 2021 dataset with over 140,000 participants to explore financial inclusion and behavior across various economies worldwide.
 
124
 
125
- Key features of this application include:
126
- - **Quick Visualization**: Instantly visualize the percentage of respondents from each country who participate in various financial services.
127
- - **Regional Analysis**: Explore financial trends and behaviors by country and region, identifying disparities in access to financial systems.
128
- - **Income-Based Analysis**: Analyze financial behaviors like savings, borrowing, and digital payments across different income levels.
129
- - **Gender-Based Analysis**: Compare financial inclusion patterns between genders, looking into variables such as account ownership, borrowing, and savings behavior.
 
 
 
 
 
 
 
130
  """)
131
 
 
132
  elif option == "Description of Variables":
133
- st.markdown("<h2 style='text-align: center;'>Descripton of Variables</h2>", unsafe_allow_html=True)
134
  st.markdown("""
135
  - **Country_Economy**: The name of the country or economy.
136
  - **Country_Code**: ISO 3-digit code representing each economy.
@@ -176,24 +197,22 @@ elif option == "Description of Variables":
176
  - **Data_Collection_Year**: The year of the data collection.
177
  """)
178
 
179
- # Main section logic for each page
180
  if option == "Regional-Based Analysis":
181
  st.markdown("<h2 style='text-align: center;'>Regional-Based Analysis</h2>", unsafe_allow_html=True)
182
- st.write("This section allows you to explore financial trends and behaviors, including savings, borrowing, and digital payments, across various regions. You can compare how access to financial systems differs between regions and examine disparities in financial inclusion globally.")
183
 
184
- # Create a dictionary mapping original column names to human-readable labels
185
  variable_labels = {
186
  'Account_At_Financial_Or_Mobile_Money_Provider': 'Account at Financial or Mobile Money Provider',
187
  'Saved_Money_Past_12_Months': 'Saved Money Past 12 Months',
188
  'Borrowed_Money_Past_12_Months': 'Borrowed Money Past 12 Months',
189
  'Paid_Bills_Online': 'Paid Bills Online',
190
- 'Financial_Worry_Old_Age': 'Financial Worry Old Age',
191
  'Owns_Mobile_Phone': 'Owns Mobile Phone',
192
- 'Has_Internet_Access': 'Has Internet Access',
193
- 'Made_Digital_Payment': 'Made Digital Payment'
194
  }
195
 
196
- # List of regions from your dataset (assuming 'WorldBank_Region' column holds this data)
197
  regions = data['WorldBank_Region'].unique()
198
 
199
  # Multiselect for region selection
@@ -202,7 +221,7 @@ if option == "Regional-Based Analysis":
202
  # Filter data based on selected regions
203
  regional_data = data[data['WorldBank_Region'].isin(selected_regions)]
204
 
205
- # Allow user to choose which variable they want to analyze (displayed without underscores)
206
  variable_to_compare = st.selectbox(
207
  "Select variable to analyze:",
208
  options=list(variable_labels.keys()),
@@ -219,6 +238,7 @@ if option == "Regional-Based Analysis":
219
  # Create an interactive Plotly bar chart to compare the regions and education levels
220
  fig = px.bar(summary, x='WorldBank_Region', y=f'Average {variable_to_compare}',
221
  color='Education_Level',
 
222
  title=f"Comparison of {variable_labels[variable_to_compare]} Across Selected Regions and Education Levels",
223
  labels={'WorldBank_Region': 'Region', f'Average {variable_to_compare}': f'Average {variable_labels[variable_to_compare]} (%)'},
224
  barmode='group')
@@ -230,33 +250,31 @@ if option == "Regional-Based Analysis":
230
  showlegend=True,
231
  width=800,
232
  height=500,
233
- margin={"r":0,"t":50,"l":0,"b":50},
234
  )
235
 
236
  # Show the chart in Streamlit
237
  st.plotly_chart(fig)
238
 
239
  # Summary of the analysis (formatting variable name)
240
- st.markdown(f"### Summary of Regional and Educational Insights")
241
- st.write(f"The analysis above shows how {variable_labels[variable_to_compare]} differs across regions and education levels.")
242
  st.write("Key takeaways:")
243
  for region in selected_regions:
244
  region_data = summary[summary['WorldBank_Region'] == region]
245
  for educ_level in region_data['Education_Level'].unique():
246
  avg_value = region_data[region_data['Education_Level'] == educ_level][f'Average {variable_to_compare}'].values[0]
247
- st.write(f"- In {region}, individuals with {educ_level} have an average {variable_labels[variable_to_compare].lower()} of {avg_value:.0f}%.")
248
-
249
 
 
250
  elif option == "Income-Based Analysis":
251
  st.markdown("<h2 style='text-align: center;'>Income-Based Analysis</h2>", unsafe_allow_html=True)
252
- st.write("This section allows you to analyze financial behaviors such as savings, borrowing, and digital payments across different income levels.")
253
 
254
- # Create a dictionary mapping original column names to human-readable labels
255
  variable_labels_income = {
256
  'Account_At_Financial_Or_Mobile_Money_Provider': 'Account at Financial or Mobile Money Provider',
257
  'Saved_Money_Past_12_Months': 'Saved Money Past 12 Months',
258
- 'Borrowed_Money_Past_12_Months': 'Borrowed Money Past 12 Months',
259
- 'Made_Digital_Payment': 'Made Digital Payment'
260
  }
261
 
262
  # Select Income Quintile
@@ -296,7 +314,7 @@ elif option == "Income-Based Analysis":
296
  title=f"{variable_labels_income[indicator]} for Income Quintile {income_quintile}",
297
  labels={indicator: variable_labels_income[indicator]},
298
  color=indicator,
299
- color_continuous_scale='Blues'
300
  )
301
 
302
  st.plotly_chart(fig_income)
@@ -306,26 +324,25 @@ elif option == "Income-Based Analysis":
306
  for indicator, percentage in income_summary_dict.items():
307
  st.write(f"**{percentage:.1f}% of respondents in Income Quintile {income_quintile} have {variable_labels_income[indicator]}**.")
308
 
309
-
310
  elif option == "Gender-Based Analysis":
311
  st.markdown("<h2 style='text-align: center;'>Gender-Based Analysis</h2>", unsafe_allow_html=True)
312
- st.write("Here you can analyze financial behaviors such as savings, borrowing, and digital payments for selected gender and age groups.")
313
 
314
- # Create a dictionary mapping original column names to human-readable labels
315
  variable_labels_gender = {
316
  'Account_At_Financial_Or_Mobile_Money_Provider': 'Account at Financial or Mobile Money Provider',
317
  'Saved_Money_Past_12_Months': 'Saved Money Past 12 Months',
318
- 'Borrowed_Money_Past_12_Months': 'Borrowed Money Past 12 Months',
319
- 'Made_Digital_Payment': 'Made Digital Payment'
320
  }
321
 
322
  # Gender selection
323
  gender = st.radio("Select Gender:", ("Female", "Male"))
324
 
325
- # Age group selection (assuming 'age_group' column is already in the dataset)
326
  age_group = st.selectbox("Select Age Group:", data['age_group'].unique())
327
 
328
- # Convert gender to appropriate coding (assuming female=1, male=2 in the dataset)
329
  gender_code = 1 if gender == "Female" else 2
330
 
331
  # Filter the data based on gender and age group
@@ -336,7 +353,7 @@ elif option == "Gender-Based Analysis":
336
  "Select Financial Indicators to Analyze:",
337
  options=list(variable_labels_gender.keys()),
338
  format_func=lambda x: variable_labels_gender[x], # Format options without underscores
339
- default=['Account_At_Financial_Or_Mobile_Money_Provider']
340
  )
341
 
342
  st.markdown(f"### Analysis for {gender}s in {age_group} Age Group")
@@ -362,7 +379,7 @@ elif option == "Gender-Based Analysis":
362
  title=f"{variable_labels_gender[indicator]} for {gender}s in {age_group} Age Group",
363
  labels={indicator: variable_labels_gender[indicator]},
364
  color=indicator,
365
- color_continuous_scale='Viridis'
366
  )
367
 
368
  st.plotly_chart(fig)
@@ -371,9 +388,14 @@ elif option == "Gender-Based Analysis":
371
  st.markdown("### Summary")
372
  for indicator, percentage in summary_dict.items():
373
  st.write(f"**{percentage:.1f}% of {gender}s in the {age_group} age group have {variable_labels_gender[indicator]}**.")
374
-
375
- elif option == "Financial Recommender Engine":
376
- st.markdown("<h2 style='text-align: center;'>Financial Recommender Engine</h2>", unsafe_allow_html=True)
 
 
 
 
 
377
 
378
  # Define the enhanced_recommender function inside the elif block
379
  def enhanced_recommender(age, income_quintile, has_debit_card, uses_mobile_money, financial_goal, savings_habit, investment_interest):
@@ -423,7 +445,7 @@ elif option == "Financial Recommender Engine":
423
  return recommendations
424
 
425
  # Collect user inputs
426
- age = st.slider("Your Age", 18, 70, 30)
427
  income_quintile = st.slider("Income Quintile (1 = Lowest, 5 = Highest)", 1, 5, 3)
428
  has_debit_card = st.radio("Do you have a debit card?", ("Yes", "No"))
429
  uses_mobile_money = st.radio("Have you used mobile money?", ("Yes", "No"))
@@ -434,45 +456,71 @@ elif option == "Financial Recommender Engine":
434
  # Initialize recommendations as an empty list
435
  recommendations = []
436
 
437
- if st.button("Get Recommendations"):
438
  # Call the enhanced_recommender function and generate recommendations
439
  recommendations = enhanced_recommender(age, income_quintile, has_debit_card, uses_mobile_money, financial_goal, savings_habit, investment_interest)
440
 
441
  # Check if the recommendations were generated successfully and display them
442
  if recommendations:
443
- st.markdown("### Recommendations:")
444
  for rec in recommendations:
445
  st.write(f"- {rec}")
446
  else:
447
- st.write("Click the 'Get Recommendations' button to receive personalized financial recommendations.")
448
 
449
- elif option == "SML Classification":
450
- st.markdown("<h2 style='text-align: center;'>SML Classification</h2>", unsafe_allow_html=True)
 
451
 
452
- # Introduction to the SML Classification page
453
  st.write("""
454
- ### Predicting Savings Behavior
455
- This page uses a **Supervised Machine Learning** model to predict whether a person has saved money in the past year.
456
- Based on the information you provide, the model will analyze your input and predict:
457
-
458
- - **Saving habit is likely**: If the model predicts that you're likely to have saved money.
459
- - **Saving habit is unlikely**: If the model predicts that you haven't likely saved money.
 
 
 
 
 
 
 
460
 
461
- The prediction is based on variables such as your age, income, employment status, and other socioeconomic factors.
462
- """)
 
463
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
464
  # Collect user inputs
 
465
  place_of_living = st.selectbox('Place of Living', ['Urban Area', 'Rural Area', 'Unknown'])
466
  education_level = st.selectbox('Education Level', ['Primary', 'Secondary', 'Tertiary'])
467
  age_group = st.selectbox('Age Group', ['Adult', 'Middle Age', 'Older Adult', 'Senior', 'Teen', 'Young Adult'])
468
  income_bracket = st.selectbox('Income Bracket', [1, 2, 3, 4, 5])
469
- female = st.radio('Gender', ['Female', 'Male'])
 
470
  is_mobileowner = st.checkbox('Is Mobile Owner?')
471
  has_internet_access = st.checkbox('Has Internet Access?')
472
- employed = st.checkbox('Employed?')
473
- high_income_region = st.checkbox('High Income Region?')
474
 
475
- # Prepare categorical and numerical features
476
  cat_features = pd.DataFrame({
477
  'Place of living': [place_of_living],
478
  'Education Level': [education_level],
@@ -488,8 +536,9 @@ elif option == "SML Classification":
488
  'Income Bracket': [income_bracket] # Directly use Income Bracket as numerical
489
  })
490
 
 
491
  cat_encoded = pd.DataFrame(ohe.transform(cat_features),
492
- columns=ohe.get_feature_names_out(['Place of living', 'Education Level', 'Age Group']))
493
 
494
  # Combine categorical and numerical features
495
  features = pd.concat([num_features, cat_encoded], axis=1)
@@ -503,15 +552,49 @@ elif option == "SML Classification":
503
  if predicted_saved >= 0.5:
504
  st.success("Saving habit is likely.")
505
  else:
506
- st.error("Saving habit is unlikely.")
507
 
508
  # SHAP explanation
509
  st.subheader('Feature Contributions 🤖')
 
 
 
 
510
  shap_values = explainer.shap_values(features)
511
- st_shap(shap.force_plot(explainer.expected_value, shap_values, features), height=400, width=600)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
512
 
513
- st.markdown("""
514
- This plot shows how each feature contributes to the predicted likelihood of saving:
515
- - Blue bars push the probability lower
516
- - Red bars push the probability higher
517
- """)
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Importing the necessary libraries
2
  import streamlit as st
3
  import pandas as pd
4
  import numpy as np
 
12
  from xgboost import XGBClassifier
13
  from sklearn.preprocessing import OneHotEncoder
14
  from streamlit_shap import st_shap
15
+ from duckduckgo_search import DDGS
16
 
17
+ # Setting up the pace icon
18
+ st.set_page_config(page_icon="📊")
19
+
20
+ # Cache the data to avoid loading it multiple times
21
  @st.cache_resource
22
  def load_data():
23
  data = pd.read_csv('data_cleaned_new.csv')
24
  return data
25
 
26
+ # Cache the model and encoder to avoid loading them multiple times
27
  @st.cache_resource
28
  def load_model_and_encoder():
29
  xgb_model = joblib.load('xgb_model.joblib')
 
32
 
33
  xgb_model, ohe = load_model_and_encoder()
34
 
35
+ # SHAP explainer
36
  explainer = shap.TreeExplainer(xgb_model)
37
 
38
  # Load the data
39
  data = load_data()
40
 
41
+ # Making and naming the sidebars
42
+ st.sidebar.title("Explore Financial Insights and AI-Powered Tools")
43
  option = st.sidebar.radio(
44
+ "Select section:",
45
+ ("Home", 'Description of Variables', "Regional-Based Analysis", "Income-Based Analysis", "Gender-Based Analysis", "Financial Advice", "Financial AI Helper", "Predict Financial Savings Behavior")
46
  )
47
 
48
+ # Addding a summary of FINDEX at the bottom of the sidebar with link
 
 
 
 
 
 
49
 
50
+ st.sidebar.markdown("""
51
+ <div style='margin-bottom: 20px;'>
52
+ <h3 style='margin-bottom: -15px;'>What is FINDEX?</h3>
53
+ <p style='margin-bottom: 20px;'>
54
+ The Global Findex database provides comprehensive data on how adults worldwide save, borrow, make payments, and manage risk.
55
+ Launched with support from the Bill & Melinda Gates Foundation, the database is updated every three years and is the world’s most
56
+ detailed dataset on how adults use formal and informal financial services. It offers insights into the financial behaviors and
57
+ access to financial systems globally.
58
+ </p>
59
+ </div>
60
+ """, unsafe_allow_html=True)
61
+
62
+ st.sidebar.image('Findex.png', use_column_width=True)
63
+ st.sidebar.markdown('For more information, visit:<br>[Global Findex Database](https://globalfindex.worldbank.org/)', unsafe_allow_html=True)
64
 
65
  # Main section logic
66
  if option == "Home":
 
73
  # Round the percentage to 2 decimal places for display
74
  economy_df['percentage'] = economy_df['percentage'].round(2)
75
 
76
+ # Create a choropleth map using Plotly with a green color scheme to make look like the earth
77
  fig = px.choropleth(
78
  economy_df,
79
  locations='Country_Economy',
 
91
  hovertext=economy_df['Country_Economy']
92
  )
93
 
94
+ # Add the title to the Plotly chart itself, which also functions as the headline for the homepage making
95
  fig.update_layout(
96
  title=dict(
97
+ text="FINDEX 2021<br><span style='font-size:24px;'>Data Visualization and AI Driven Financial Recommendations</span>", # Title with subtitle
98
  font=dict(size=49, color='black', family="Raleway, sans-serif"), # Stylish font and bigger size
99
  x=0.5, # Center the title
100
  xanchor='center',
 
130
  # Display the Plotly chart first
131
  st.plotly_chart(fig, use_container_width=True, config={'displayModeBar': False})
132
 
133
+ # Discription on home page
134
  st.markdown("""
135
+ This application leverages the Global FINDEX 2021 dataset, with over 140,000 participants, to explore financial inclusion and behavior across various economies worldwide.
136
+ Instantly visualize the percentage of respondents from each region who participate in various financial services and gain insights into financial trends and behaviors.
137
 
138
+ Features of this application include:
139
+ - **Regional Analysis:** Explore financial trends and behaviors by country and region, identifying disparities in access to financial systems.
140
+
141
+ - **Income-Based Analysis:** Analyze financial behaviors like savings, borrowing, and digital payments across different income levels.
142
+
143
+ - **Gender-Based Analysis:** Compare financial inclusion patterns between genders, looking into variables such as account ownership, borrowing, and savings behavior.
144
+
145
+ - **Financial Advice:** Receive tailored financial advice based on inputs related to the FINDEX dataset, offering insights into financial behaviors and decision-making.
146
+
147
+ - **Financial AI Helper:** Receive personalized financial guidance and recommendations based on individual data inputs, leveraging AI to provide actionable advice.
148
+
149
+ - **Predict Financial Savings Behavior:** Use a Supervised Machine Learning model to predict whether an individual has saved money based on socioeconomic factors, with AI-driven insights explaining the outcome.
150
  """)
151
 
152
+ # Second section for the description of variables
153
  elif option == "Description of Variables":
154
+ st.markdown("<h2 style='text-align: center;'>Description of Variables</h2>", unsafe_allow_html=True)
155
  st.markdown("""
156
  - **Country_Economy**: The name of the country or economy.
157
  - **Country_Code**: ISO 3-digit code representing each economy.
 
197
  - **Data_Collection_Year**: The year of the data collection.
198
  """)
199
 
200
+ # Third section for the regional-based analysis
201
  if option == "Regional-Based Analysis":
202
  st.markdown("<h2 style='text-align: center;'>Regional-Based Analysis</h2>", unsafe_allow_html=True)
203
+ st.write("This section allows for exploration of financial trends and behaviors, including savings, borrowing, and digital payments, across various regions. It's possible to access how financial systems differs between regions and examine disparities in financial inclusion globally.")
204
 
205
+ # Creating a dictionary mapping to get rid of underscores in the variable names making them more readable
206
  variable_labels = {
207
  'Account_At_Financial_Or_Mobile_Money_Provider': 'Account at Financial or Mobile Money Provider',
208
  'Saved_Money_Past_12_Months': 'Saved Money Past 12 Months',
209
  'Borrowed_Money_Past_12_Months': 'Borrowed Money Past 12 Months',
210
  'Paid_Bills_Online': 'Paid Bills Online',
 
211
  'Owns_Mobile_Phone': 'Owns Mobile Phone',
212
+ 'Has_Internet_Access': 'Has Internet Access'
 
213
  }
214
 
215
+ # List of regions from the dataset
216
  regions = data['WorldBank_Region'].unique()
217
 
218
  # Multiselect for region selection
 
221
  # Filter data based on selected regions
222
  regional_data = data[data['WorldBank_Region'].isin(selected_regions)]
223
 
224
+ # Allow user to choose which variable they want to analyze
225
  variable_to_compare = st.selectbox(
226
  "Select variable to analyze:",
227
  options=list(variable_labels.keys()),
 
238
  # Create an interactive Plotly bar chart to compare the regions and education levels
239
  fig = px.bar(summary, x='WorldBank_Region', y=f'Average {variable_to_compare}',
240
  color='Education_Level',
241
+ color_continuous_scale='Teal',
242
  title=f"Comparison of {variable_labels[variable_to_compare]} Across Selected Regions and Education Levels",
243
  labels={'WorldBank_Region': 'Region', f'Average {variable_to_compare}': f'Average {variable_labels[variable_to_compare]} (%)'},
244
  barmode='group')
 
250
  showlegend=True,
251
  width=800,
252
  height=500,
253
+ margin={"r":0,"t":50,"l":0,"b":50}
254
  )
255
 
256
  # Show the chart in Streamlit
257
  st.plotly_chart(fig)
258
 
259
  # Summary of the analysis (formatting variable name)
260
+ st.markdown(f"### Summary")
 
261
  st.write("Key takeaways:")
262
  for region in selected_regions:
263
  region_data = summary[summary['WorldBank_Region'] == region]
264
  for educ_level in region_data['Education_Level'].unique():
265
  avg_value = region_data[region_data['Education_Level'] == educ_level][f'Average {variable_to_compare}'].values[0]
266
+ st.write(f"- **In {region}, individuals with {educ_level} have an average {variable_labels[variable_to_compare].lower()} of {avg_value:.0f}%.**")
 
267
 
268
+ # fourth section for the income-based analysis
269
  elif option == "Income-Based Analysis":
270
  st.markdown("<h2 style='text-align: center;'>Income-Based Analysis</h2>", unsafe_allow_html=True)
271
+ st.write("This section allows for comparasions of financial behaviors such as having an financial acount aswell as savings and borrowing across different income levels.")
272
 
273
+ # Create a dictionary mapping original column names to remove underscores and make them more readable
274
  variable_labels_income = {
275
  'Account_At_Financial_Or_Mobile_Money_Provider': 'Account at Financial or Mobile Money Provider',
276
  'Saved_Money_Past_12_Months': 'Saved Money Past 12 Months',
277
+ 'Borrowed_Money_Past_12_Months': 'Borrowed Money Past 12 Months'
 
278
  }
279
 
280
  # Select Income Quintile
 
314
  title=f"{variable_labels_income[indicator]} for Income Quintile {income_quintile}",
315
  labels={indicator: variable_labels_income[indicator]},
316
  color=indicator,
317
+ color_continuous_scale='Teal'
318
  )
319
 
320
  st.plotly_chart(fig_income)
 
324
  for indicator, percentage in income_summary_dict.items():
325
  st.write(f"**{percentage:.1f}% of respondents in Income Quintile {income_quintile} have {variable_labels_income[indicator]}**.")
326
 
327
+ # Fifth section
328
  elif option == "Gender-Based Analysis":
329
  st.markdown("<h2 style='text-align: center;'>Gender-Based Analysis</h2>", unsafe_allow_html=True)
330
+ st.write("Here it's possible to visualize financial behaviors such as savings and borrowing for selected gender and age groups.")
331
 
332
+ # Create a dictionary mapping original column names to remove underscores and make them more readable
333
  variable_labels_gender = {
334
  'Account_At_Financial_Or_Mobile_Money_Provider': 'Account at Financial or Mobile Money Provider',
335
  'Saved_Money_Past_12_Months': 'Saved Money Past 12 Months',
336
+ 'Borrowed_Money_Past_12_Months': 'Borrowed Money Past 12 Months'
 
337
  }
338
 
339
  # Gender selection
340
  gender = st.radio("Select Gender:", ("Female", "Male"))
341
 
342
+ # Age group selection
343
  age_group = st.selectbox("Select Age Group:", data['age_group'].unique())
344
 
345
+ # Convert gender to appropriate coding
346
  gender_code = 1 if gender == "Female" else 2
347
 
348
  # Filter the data based on gender and age group
 
353
  "Select Financial Indicators to Analyze:",
354
  options=list(variable_labels_gender.keys()),
355
  format_func=lambda x: variable_labels_gender[x], # Format options without underscores
356
+ default=['Account_At_Financial_Or_Mobile_Money_Provider'] # Default is financial account ownership
357
  )
358
 
359
  st.markdown(f"### Analysis for {gender}s in {age_group} Age Group")
 
379
  title=f"{variable_labels_gender[indicator]} for {gender}s in {age_group} Age Group",
380
  labels={indicator: variable_labels_gender[indicator]},
381
  color=indicator,
382
+ color_continuous_scale='Teal'
383
  )
384
 
385
  st.plotly_chart(fig)
 
388
  st.markdown("### Summary")
389
  for indicator, percentage in summary_dict.items():
390
  st.write(f"**{percentage:.1f}% of {gender}s in the {age_group} age group have {variable_labels_gender[indicator]}**.")
391
+
392
+ # Sixth section for the financial advice
393
+ elif option == "Financial Advice":
394
+ st.markdown("<h2 style='text-align: center;'>Financial Advice</h2>", unsafe_allow_html=True)
395
+
396
+ st.write("""Based on the information provided, this section offers financial advice to help with financial decisions, derived from the FINDEX dataset. The advice is generated from general trends in financial behavior.
397
+
398
+ For more personalized financial advice tailored to individual circumstances, the AI Financial Helper provides deeper, AI-driven recommendations.""")
399
 
400
  # Define the enhanced_recommender function inside the elif block
401
  def enhanced_recommender(age, income_quintile, has_debit_card, uses_mobile_money, financial_goal, savings_habit, investment_interest):
 
445
  return recommendations
446
 
447
  # Collect user inputs
448
+ age = st.slider("Your Age", 18, 100, 30)
449
  income_quintile = st.slider("Income Quintile (1 = Lowest, 5 = Highest)", 1, 5, 3)
450
  has_debit_card = st.radio("Do you have a debit card?", ("Yes", "No"))
451
  uses_mobile_money = st.radio("Have you used mobile money?", ("Yes", "No"))
 
456
  # Initialize recommendations as an empty list
457
  recommendations = []
458
 
459
+ if st.button("Get Advice"):
460
  # Call the enhanced_recommender function and generate recommendations
461
  recommendations = enhanced_recommender(age, income_quintile, has_debit_card, uses_mobile_money, financial_goal, savings_habit, investment_interest)
462
 
463
  # Check if the recommendations were generated successfully and display them
464
  if recommendations:
465
+ st.markdown("### Advice:")
466
  for rec in recommendations:
467
  st.write(f"- {rec}")
468
  else:
469
+ st.write("Click the 'Get Advice' button to receive financial advice based on the FINDEX dataset.")
470
 
471
+ # Seventh section for the financial AI helper
472
+ elif option == "Financial AI Helper":
473
+ st.markdown("<h2 style='text-align: center;'>Financial AI Helper</h2>", unsafe_allow_html=True)
474
 
 
475
  st.write("""
476
+ ### Personalized Financial Guidance
477
+ Welcome to the **Financial AI Helper!** This smart **AI assistant** is designed to provide you with **personalized financial guidance** based on the information you provide.
478
+ It could either be that you're seeking a further explanation of the advice received in the **Financial Advice section**,
479
+ which is based on the **FINDEX** data variables, or that you're looking for more personalized financial advice from the **AI helper.**
480
+
481
+ Simply enter your information below, and let the **AI** assist you with clear and actionable financial solutions!""")
482
+
483
+ # Container for user input and chat button for the AI assistant
484
+ with st.container():
485
+ user_input = st.text_area("Financial AI Helper", height=200)
486
+ if st.button("Get Financial Advice"):
487
+ results = DDGS().chat(user_input, model='gpt-4o-mini')
488
+ st.write(results)
489
 
490
+ # Eighth section for the financial savings behavior prediction
491
+ elif option == "Predict Financial Savings Behavior":
492
+ st.markdown("<h2 style='text-align: center;'>Predict Financial Savings Behavior</h2>", unsafe_allow_html=True)
493
 
494
+
495
+ # Introduction to the Predict Financial Savings Behavior page
496
+ st.write("""
497
+ ### Supervised Learning Model with AI-Powered Insights
498
+ This page utilizes a Supervised Machine Learning model to predict whether an individual has saved money in the past year, using data from the FINDEX dataset.
499
+ Based on the provided information, the model analyzes the input and makes a prediction:
500
+
501
+ - **Saving habit is likely:** If the model predicts that the individual is likely to have saved money.
502
+ - **Saving habit is unlikely:** If the model predicts that the individual has not likely saved money.
503
+
504
+ In addition to the prediction, an AI-driven analysis will provide insights and explain the key factors contributing to the outcome.
505
+ This will help clarify how variables such as age, income, employment status, and other socioeconomic factors impact the likelihood of saving.
506
+ Age group, income bracket, and other factors will be used to predict the saving behavior.
507
+
508
+ The ranges for the age group is the following: Teen (15-19)
509
+ Young Adult (20-35), Adult (36-55), Middle Age (36-55), Older Adult (56-69), Senior (56-69), Elderly (70+)""")
510
+
511
  # Collect user inputs
512
+ female = st.radio('Gender', ['Female', 'Male'])
513
  place_of_living = st.selectbox('Place of Living', ['Urban Area', 'Rural Area', 'Unknown'])
514
  education_level = st.selectbox('Education Level', ['Primary', 'Secondary', 'Tertiary'])
515
  age_group = st.selectbox('Age Group', ['Adult', 'Middle Age', 'Older Adult', 'Senior', 'Teen', 'Young Adult'])
516
  income_bracket = st.selectbox('Income Bracket', [1, 2, 3, 4, 5])
517
+ high_income_region = st.checkbox('High Income Region?')
518
+ employed = st.checkbox('Employed?')
519
  is_mobileowner = st.checkbox('Is Mobile Owner?')
520
  has_internet_access = st.checkbox('Has Internet Access?')
521
+
 
522
 
523
+ # Prepare categorical and numcerial/binary features
524
  cat_features = pd.DataFrame({
525
  'Place of living': [place_of_living],
526
  'Education Level': [education_level],
 
536
  'Income Bracket': [income_bracket] # Directly use Income Bracket as numerical
537
  })
538
 
539
+ # One-hot encode categorical features
540
  cat_encoded = pd.DataFrame(ohe.transform(cat_features),
541
+ columns=ohe.get_feature_names_out(['Place of living', 'Education Level', 'Age Group']))
542
 
543
  # Combine categorical and numerical features
544
  features = pd.concat([num_features, cat_encoded], axis=1)
 
552
  if predicted_saved >= 0.5:
553
  st.success("Saving habit is likely.")
554
  else:
555
+ st.error("Saving habit is unlikely.")
556
 
557
  # SHAP explanation
558
  st.subheader('Feature Contributions 🤖')
559
+ st.write("""- Blue bars push the probability lower
560
+ while the Red bars push the probability higher""")
561
+
562
+ # Ensure SHAP values are handled properly (flatten if multi-dimensional)
563
  shap_values = explainer.shap_values(features)
564
+ if isinstance(shap_values, list):
565
+ shap_values = shap_values[0] # For binary classification, select the first set of SHAP values
566
+
567
+ st_shap(shap.force_plot(explainer.expected_value, shap_values, features), height=175, width=1750)
568
+
569
+ # Extract important features based on SHAP values
570
+ shap_df = pd.DataFrame({
571
+ 'Feature': features.columns,
572
+ 'SHAP Value': shap_values.flatten() # Ensure SHAP values are flattened
573
+ })
574
+
575
+ # Sort by absolute SHAP value to get the most important features
576
+ shap_df['Absolute SHAP Value'] = shap_df['SHAP Value'].abs()
577
+ top_important_features = shap_df.sort_values(by='Absolute SHAP Value', ascending=False).head(4)
578
+
579
+ # Generate summary of the most important features
580
+ important_features_summary = "\n".join(
581
+ [f"- **{row['Feature']}**: {'Positive' if row['SHAP Value'] > 0 else 'Negative'} contribution"
582
+ for _, row in top_important_features.iterrows()]
583
+ )
584
 
585
+ # Pass the summary to the AI assistant for commentary
586
+ with st.expander("AI's analysis of the results"):
587
+ # Construct the AI input with explicit instructions
588
+ user_input = (
589
+ f"You are an AI financial expert, comment on why the saving habit was deemed likely or unlikely. "
590
+ f"Interpret the features based on these rules: "
591
+ f"1. Blue bars represent negative contributions to saving likelihood, and red bars represent positive contributions. "
592
+ f"2. If 'Employment' is 0, the person is unemployed which is not good for saving money. "
593
+ f"3. If 'Has Internet Access' is 0, the person does not have internet access which indicaites thay maybe the person does not save money as they do not have money for internet. "
594
+ f"4. If 'Is Mobile Owner' is 0, the person does not have mobile access wich indicates that the person does not save money as the person can not afford a phone. "
595
+ f"5. If 'High Income Region' is 0, the person is from a non-high-income region. If the person is from a high income region they a larger change of saving up money "
596
+ f"Here are the key factors and their contributions: {important_features_summary}"
597
+ )
598
+ # Generate AI response
599
+ ai_response = DDGS().chat(user_input, model='gpt-4o-mini')
600
+ st.markdown(ai_response)