Bjerring98 commited on
Commit
a485c7a
·
verified ·
1 Parent(s): ee26648

Upload 5 files

Browse files
Files changed (6) hide show
  1. .gitattributes +1 -0
  2. app.py +473 -0
  3. data_cleaned_new.csv +3 -0
  4. ohe.joblib +3 -0
  5. requirements.txt +12 -0
  6. xgb_model.joblib +3 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ data_cleaned_new.csv filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,473 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ import seaborn as sns
6
+ import altair as alt
7
+ import plotly.express as px
8
+ from sklearn.model_selection import train_test_split
9
+ import joblib
10
+ import shap as shap
11
+ from xgboost import XGBClassifier
12
+ from sklearn.preprocessing import OneHotEncoder
13
+ from streamlit_shap import st_shap
14
+
15
+ # Cache the data
16
+ @st.cache
17
+ def load_data():
18
+ data = pd.read_csv('data_cleaned_new.csv')
19
+ return data
20
+
21
+ @st.cache_resource
22
+ def load_model_and_encoder():
23
+ xgb_model = joblib.load('xgb_model.joblib')
24
+ ohe = joblib.load('ohe.joblib')
25
+ return xgb_model, ohe
26
+
27
+ xgb_model, ohe = load_model_and_encoder()
28
+
29
+
30
+ # Load the data
31
+ data = load_data()
32
+
33
+ # Sidebars
34
+ st.sidebar.title("Explore Financial Insights")
35
+ option = st.sidebar.radio(
36
+ "Select an analysis section:",
37
+ ("Home", 'Description of Variables', "Regional-Based Analysis", "Income-Based Analysis", "Gender-Based Analysis", "Financial Recommender Engine", "SML Classification")
38
+ )
39
+
40
+ # Add a summary of Findex at the bottom of the sidebar
41
+ st.sidebar.markdown("### What is Findex?")
42
+ st.sidebar.write("""
43
+ The Global Findex database provides comprehensive data on how adults worldwide save, borrow, make payments, and manage risk.
44
+ Launched with support from the Bill & Melinda Gates Foundation, the database is updated every three years and is the world’s most
45
+ detailed dataset on how adults use formal and informal financial services. It offers insights into the financial behaviors and
46
+ access to financial systems globally.
47
+
48
+ For more information, visit the [Global Findex website](https://www.worldbank.org/en/publication/globalfindex).
49
+ """)
50
+
51
+ # Main section logic
52
+ if option == "Home":
53
+ # First display the Plotly globe with the title
54
+ # Create the globe visualization
55
+ economy_data = data['Country_Economy'].value_counts(normalize=True) * 100
56
+ economy_df = economy_data.reset_index()
57
+ economy_df.columns = ['Country_Economy', 'percentage']
58
+
59
+ # Round the percentage to 2 decimal places for display
60
+ economy_df['percentage'] = economy_df['percentage'].round(2)
61
+
62
+ # Create a choropleth map using Plotly with a green color scheme
63
+ fig = px.choropleth(
64
+ economy_df,
65
+ locations='Country_Economy',
66
+ locationmode='country names',
67
+ color='percentage',
68
+ hover_name='Country_Economy',
69
+ hover_data={'percentage': ':.2f'}, # Format hover data to 2 decimal places
70
+ color_continuous_scale='Greens',
71
+ )
72
+
73
+ # Update hover text to add the percentage sign
74
+ fig.update_traces(
75
+ hovertemplate="<b>%{hovertext}</b><br>" +
76
+ "percentage=%{z:.2f}%<extra></extra>",
77
+ hovertext=economy_df['Country_Economy']
78
+ )
79
+
80
+ # Add the title to the Plotly chart itself, making it bold and larger
81
+ fig.update_layout(
82
+ title=dict(
83
+ text="FINDEX 2021 Data Visualizer", # Title text
84
+ font=dict(size=49, color='black', family="Raleway, sans-serif"), # Stylish font and bigger size
85
+ x=0.5, # Center the title
86
+ xanchor='center',
87
+ y=0.95, # Adjust positioning
88
+ yanchor='top',
89
+ pad=dict(t=20), # Add padding to reduce space
90
+ ),
91
+ geo=dict(
92
+ showframe=True, # Show a frame around the map
93
+ framecolor="black", # Frame color
94
+ showcoastlines=True, # Keep coastlines visible
95
+ coastlinecolor="Black", # Set coastlines color to black
96
+ projection_type='orthographic', # Change projection to orthographic for a globe effect
97
+ projection_scale=0.85, # Zoom out more by reducing the scale
98
+ center=dict(lat=10, lon=0), # Center the globe around the equator
99
+ lataxis_range=[-85, 85], # Strictly limit the vertical dragging
100
+ lonaxis_range=[-180, 180], # Strictly limit the horizontal dragging
101
+ oceancolor='lightblue', # Set the color of the oceans
102
+ showocean=True, # Ensure oceans are displayed
103
+ ),
104
+ coloraxis_colorbar=dict(
105
+ title="Participation (%)",
106
+ len=0.5,
107
+ thickness=15,
108
+ tickvals=[0.5, 1, 1.5, 2],
109
+ ticks="outside",
110
+ ),
111
+ width=1000,
112
+ height=800,
113
+ margin={"r":50,"t":50,"l":0,"b":0}
114
+ )
115
+
116
+ # Display the Plotly chart first
117
+ st.plotly_chart(fig, use_container_width=True, config={'displayModeBar': False})
118
+
119
+ # Now display the Financial Inclusion and Behaviour description
120
+ st.markdown("""
121
+ This application leverages the Global Findex 2021 dataset with over 140,000 participants to explore financial inclusion and behavior across various economies worldwide.
122
+
123
+ Key features of this application include:
124
+ - **Quick Visualization**: Instantly visualize the percentage of respondents from each country who participate in various financial services.
125
+ - **Regional Analysis**: Explore financial trends and behaviors by country and region, identifying disparities in access to financial systems.
126
+ - **Income-Based Analysis**: Analyze financial behaviors like savings, borrowing, and digital payments across different income levels.
127
+ - **Gender-Based Analysis**: Compare financial inclusion patterns between genders, looking into variables such as account ownership, borrowing, and savings behavior.
128
+ """)
129
+
130
+ elif option == "Description of Variables":
131
+ st.markdown("<h2 style='text-align: center;'>Descripton of Variables</h2>", unsafe_allow_html=True)
132
+ st.markdown("""
133
+ - **Country_Economy**: The name of the country or economy.
134
+ - **Country_Code**: ISO 3-digit code representing each economy.
135
+ - **WorldBank_Region**: World Bank region classification (e.g., Sub-Saharan Africa, East Asia, etc.).
136
+ - **Adult_Population**: The population of adults (aged 15+) in the economy.
137
+ - **Respondent_ID**: A unique identifier for each respondent in the dataset.
138
+ - **Survey_Weight**: Survey weight for each respondent, used to make the sample representative of the population.
139
+ - **Gender**: Gender of the respondent (1 if female, 2 if male).
140
+ - **Respondent_Age**: Age of the respondent.
141
+ - **Education_Level**: Respondent’s education level from level 1 to 3.
142
+ - **Income_Quintile**: Income quintile of the respondent’s household.
143
+ - **Employment_Status**: Employment status of the respondent.
144
+ - **Account_At_Financial_Or_Mobile_Money_Provider**: Whether the respondent has an account at a financial institution or with a mobile money service provider.
145
+ - **Account_At_Formal_Financial_Institution**: Whether the respondent has an account at a formal financial institution.
146
+ - **Has_Debit_Card**: Has a debit card.
147
+ - **Used_Mobile_Money**: Whether the respondent used mobile money.
148
+ - **Paid_Bills_Online**: Made bill payments online using the Internet.
149
+ - **Sent_Money_To_Relative_Friend_Online**: Sent money to a relative or friend online using the Internet.
150
+ - **Bought_Something_Online**: Bought something online using the Internet.
151
+ - **Saved_For_Old_Age**: Saved for old age.
152
+ - **Saved_At_Formal_Financial_Institution**: Saved using an account at a financial institution.
153
+ - **Borrowed_For_Medical_Purposes**: Borrowed for medical purposes.
154
+ - **Borrowed_From_Formal_Financial_Institution**: Borrowed from a financial institution.
155
+ - **Borrowed_From_Family_Or_Friends**: Borrowed from family or friends.
156
+ - **Main_Source_Of_Emergency_Funds_30_Days**: Main source of emergency funds in 30 days.
157
+ - **Paid_Utility_Bill**: Paid a utility bill.
158
+ - **Received_Wage_Payments**: Received wage payments.
159
+ - **Received_Government_Transfer**: Received a government transfer.
160
+ - **Received_Government_Pension**: Received a government pension.
161
+ - **Financial_Worry_Old_Age**: Financially worried: old age.
162
+ - **Financial_Worry_Medical_Cost**: Financially worried: medical cost.
163
+ - **Financial_Worry_Bills**: Financially worried: bills.
164
+ - **Financial_Worry_Education**: Financially worried: education.
165
+ - **Saved_Money_Past_12_Months**: Saved money in the past 12 months.
166
+ - **Borrowed_Money_Past_12_Months**: Borrowed money in the past 12 months.
167
+ - **Received_Wage_Payment_And_Method**: Received a wage payment and method.
168
+ - **Received_Gov_Transfer_Or_Aid_And_Method**: Received government transfers or aid payments and method.
169
+ - **Received_Gov_Pension_Payments_And_Method**: Received government pension payments and method.
170
+ - **Paid_Utility_Bills_And_Method**: Paid utility bills and method.
171
+ - **Owns_Mobile_Phone**: Whether the respondent owns a mobile phone.
172
+ - **Has_Internet_Access**: Whether the respondent has access to the internet.
173
+ - **Made_Digital_Payment**: Whether the respondent made any digital payment.
174
+ - **Data_Collection_Year**: The year of the data collection.
175
+ """)
176
+
177
+ # Main section logic for each page
178
+ if option == "Regional-Based Analysis":
179
+ st.markdown("<h2 style='text-align: center;'>Regional-Based Analysis</h2>", unsafe_allow_html=True)
180
+ st.write("This section allows you to explore financial trends and behaviors, including savings, borrowing, and digital payments, across various regions. You can compare how access to financial systems differs between regions and examine disparities in financial inclusion globally.")
181
+
182
+ # Create a dictionary mapping original column names to human-readable labels
183
+ variable_labels = {
184
+ 'Account_At_Financial_Or_Mobile_Money_Provider': 'Account at Financial or Mobile Money Provider',
185
+ 'Saved_Money_Past_12_Months': 'Saved Money Past 12 Months',
186
+ 'Borrowed_Money_Past_12_Months': 'Borrowed Money Past 12 Months',
187
+ 'Paid_Bills_Online': 'Paid Bills Online',
188
+ 'Financial_Worry_Old_Age': 'Financial Worry Old Age',
189
+ 'Owns_Mobile_Phone': 'Owns Mobile Phone',
190
+ 'Has_Internet_Access': 'Has Internet Access',
191
+ 'Made_Digital_Payment': 'Made Digital Payment'
192
+ }
193
+
194
+ # List of regions from your dataset (assuming 'WorldBank_Region' column holds this data)
195
+ regions = data['WorldBank_Region'].unique()
196
+
197
+ # Multiselect for region selection
198
+ selected_regions = st.multiselect("Select regions to compare", options=regions, default=regions[0])
199
+
200
+ # Filter data based on selected regions
201
+ regional_data = data[data['WorldBank_Region'].isin(selected_regions)]
202
+
203
+ # Allow user to choose which variable they want to analyze (displayed without underscores)
204
+ variable_to_compare = st.selectbox(
205
+ "Select variable to analyze:",
206
+ options=list(variable_labels.keys()),
207
+ format_func=lambda x: variable_labels[x] # Replaces underscores with spaces in dropdown
208
+ )
209
+
210
+ # Summarize the data for the selected regions and variable, including education level (educ_label)
211
+ summary = regional_data.groupby(['WorldBank_Region', 'educ_label'])[variable_to_compare].mean().reset_index()
212
+ summary.columns = ['WorldBank_Region', 'Education_Level', f'Average {variable_to_compare}']
213
+
214
+ # Multiply the average by 100 to display percentages
215
+ summary[f'Average {variable_to_compare}'] = summary[f'Average {variable_to_compare}'].mul(100).round(2)
216
+
217
+ # Create an interactive Plotly bar chart to compare the regions and education levels
218
+ fig = px.bar(summary, x='WorldBank_Region', y=f'Average {variable_to_compare}',
219
+ color='Education_Level',
220
+ title=f"Comparison of {variable_labels[variable_to_compare]} Across Selected Regions and Education Levels",
221
+ labels={'WorldBank_Region': 'Region', f'Average {variable_to_compare}': f'Average {variable_labels[variable_to_compare]} (%)'},
222
+ barmode='group')
223
+
224
+ # Update layout for better aesthetics
225
+ fig.update_layout(
226
+ xaxis_title="Region",
227
+ yaxis_title=f"Average {variable_labels[variable_to_compare]} (%)",
228
+ showlegend=True,
229
+ width=800,
230
+ height=500,
231
+ margin={"r":0,"t":50,"l":0,"b":50},
232
+ )
233
+
234
+ # Show the chart in Streamlit
235
+ st.plotly_chart(fig)
236
+
237
+ # Summary of the analysis (formatting variable name)
238
+ st.markdown(f"### Summary of Regional and Educational Insights")
239
+ st.write(f"The analysis above shows how {variable_labels[variable_to_compare]} differs across regions and education levels.")
240
+ st.write("Key takeaways:")
241
+ for region in selected_regions:
242
+ region_data = summary[summary['WorldBank_Region'] == region]
243
+ for educ_level in region_data['Education_Level'].unique():
244
+ avg_value = region_data[region_data['Education_Level'] == educ_level][f'Average {variable_to_compare}'].values[0]
245
+ st.write(f"- In {region}, individuals with {educ_level} have an average {variable_labels[variable_to_compare].lower()} of {avg_value:.0f}%.")
246
+
247
+
248
+ elif option == "Income-Based Analysis":
249
+ st.markdown("<h2 style='text-align: center;'>Income-Based Analysis</h2>", unsafe_allow_html=True)
250
+ st.write("This section allows you to analyze financial behaviors such as savings, borrowing, and digital payments across different income levels.")
251
+
252
+ # Create a dictionary mapping original column names to human-readable labels
253
+ variable_labels_income = {
254
+ 'Account_At_Financial_Or_Mobile_Money_Provider': 'Account at Financial or Mobile Money Provider',
255
+ 'Saved_Money_Past_12_Months': 'Saved Money Past 12 Months',
256
+ 'Borrowed_Money_Past_12_Months': 'Borrowed Money Past 12 Months',
257
+ 'Made_Digital_Payment': 'Made Digital Payment'
258
+ }
259
+
260
+ # Select Income Quintile
261
+ income_quintile = st.selectbox("Select Income Quintile:", data['Income_Quintile'].unique())
262
+
263
+ # Filter the data based on the selected income quintile
264
+ filtered_data_income = data[data['Income_Quintile'] == income_quintile]
265
+
266
+ # Multi-select for financial indicators (displayed without underscores)
267
+ selected_indicators_income = st.multiselect(
268
+ "Select Financial Indicators to Analyze:",
269
+ options=list(variable_labels_income.keys()),
270
+ format_func=lambda x: variable_labels_income[x], # Format options without underscores
271
+ default='Account_At_Financial_Or_Mobile_Money_Provider' # Default is financial account ownership
272
+ )
273
+
274
+ st.markdown(f"### Analysis for Income Quintile {income_quintile}")
275
+
276
+ # Initialize a dictionary to store the summary for income analysis
277
+ income_summary_dict = {}
278
+
279
+ # Loop through selected indicators and create a chart for each
280
+ for indicator in selected_indicators_income:
281
+ # Normalize and calculate the percentage for the selected indicator
282
+ income_indicator_chart = filtered_data_income[indicator].value_counts(normalize=True).mul(100).reset_index()
283
+ income_indicator_chart.columns = [indicator, 'Percentage']
284
+
285
+ # Get the percentage of people with the selected financial indicator
286
+ has_indicator_income = income_indicator_chart[income_indicator_chart[indicator] == 1]['Percentage'].values[0] if 1 in income_indicator_chart[indicator].values else 0
287
+ income_summary_dict[indicator] = has_indicator_income
288
+
289
+ # Create a bar chart for each selected indicator (labels without underscores)
290
+ fig_income = px.bar(
291
+ income_indicator_chart,
292
+ x=indicator,
293
+ y='Percentage',
294
+ title=f"{variable_labels_income[indicator]} for Income Quintile {income_quintile}",
295
+ labels={indicator: variable_labels_income[indicator]},
296
+ color=indicator,
297
+ color_continuous_scale='Blues'
298
+ )
299
+
300
+ st.plotly_chart(fig_income)
301
+
302
+ # Print out the summary text at the bottom for income analysis
303
+ st.markdown("### Summary")
304
+ for indicator, percentage in income_summary_dict.items():
305
+ st.write(f"**{percentage:.1f}% of respondents in Income Quintile {income_quintile} have {variable_labels_income[indicator]}**.")
306
+
307
+
308
+ elif option == "Gender-Based Analysis":
309
+ st.markdown("<h2 style='text-align: center;'>Gender-Based Analysis</h2>", unsafe_allow_html=True)
310
+ st.write("Here you can analyze financial behaviors such as savings, borrowing, and digital payments for selected gender and age groups.")
311
+
312
+ # Create a dictionary mapping original column names to human-readable labels
313
+ variable_labels_gender = {
314
+ 'Account_At_Financial_Or_Mobile_Money_Provider': 'Account at Financial or Mobile Money Provider',
315
+ 'Saved_Money_Past_12_Months': 'Saved Money Past 12 Months',
316
+ 'Borrowed_Money_Past_12_Months': 'Borrowed Money Past 12 Months',
317
+ 'Made_Digital_Payment': 'Made Digital Payment'
318
+ }
319
+
320
+ # Gender selection
321
+ gender = st.radio("Select Gender:", ("Female", "Male"))
322
+
323
+ # Age group selection (assuming 'age_group' column is already in the dataset)
324
+ age_group = st.selectbox("Select Age Group:", data['age_group'].unique())
325
+
326
+ # Convert gender to appropriate coding (assuming female=1, male=2 in the dataset)
327
+ gender_code = 1 if gender == "Female" else 2
328
+
329
+ # Filter the data based on gender and age group
330
+ filtered_data = data[(data['Gender'] == gender_code) & (data['age_group'] == age_group)]
331
+
332
+ # Multi-select for financial indicators (displayed without underscores)
333
+ selected_indicators = st.multiselect(
334
+ "Select Financial Indicators to Analyze:",
335
+ options=list(variable_labels_gender.keys()),
336
+ format_func=lambda x: variable_labels_gender[x], # Format options without underscores
337
+ default=['Account_At_Financial_Or_Mobile_Money_Provider']
338
+ )
339
+
340
+ st.markdown(f"### Analysis for {gender}s in {age_group} Age Group")
341
+
342
+ # Initialize a dictionary to store the summary
343
+ summary_dict = {}
344
+
345
+ # Loop through selected indicators and create a chart for each
346
+ for indicator in selected_indicators:
347
+ # Normalize and calculate the percentage for the selected indicator
348
+ indicator_chart = filtered_data[indicator].value_counts(normalize=True).mul(100).reset_index()
349
+ indicator_chart.columns = [indicator, 'Percentage']
350
+
351
+ # Get the percentage of people with the selected financial indicator
352
+ has_indicator = indicator_chart[indicator_chart[indicator] == 1]['Percentage'].values[0] if 1 in indicator_chart[indicator].values else 0
353
+ summary_dict[indicator] = has_indicator
354
+
355
+ # Create a bar chart for each selected indicator (without underscores in labels)
356
+ fig = px.bar(
357
+ indicator_chart,
358
+ x=indicator,
359
+ y='Percentage',
360
+ title=f"{variable_labels_gender[indicator]} for {gender}s in {age_group} Age Group",
361
+ labels={indicator: variable_labels_gender[indicator]},
362
+ color=indicator,
363
+ color_continuous_scale='Viridis'
364
+ )
365
+
366
+ st.plotly_chart(fig)
367
+
368
+ # Print out the summary text at the bottom
369
+ st.markdown("### Summary")
370
+ for indicator, percentage in summary_dict.items():
371
+ st.write(f"**{percentage:.1f}% of {gender}s in the {age_group} age group have {variable_labels_gender[indicator]}**.")
372
+
373
+ elif option == "Financial Recommender Engine":
374
+ st.markdown("<h2 style='text-align: center;'>Financial Recommender Engine</h2>", unsafe_allow_html=True)
375
+
376
+ # Define the enhanced_recommender function inside the elif block
377
+ def enhanced_recommender(age, income_quintile, has_debit_card, uses_mobile_money, financial_goal, savings_habit, investment_interest):
378
+ recommendations = []
379
+
380
+ # Financial product recommendations
381
+ if has_debit_card == "No":
382
+ recommendations.append("Consider getting a debit card. Debit cards offer secure, convenient access to your money and can help you manage day-to-day transactions.")
383
+
384
+ if uses_mobile_money == "No":
385
+ recommendations.append("Mobile money services are a great way to manage transactions remotely and even save small amounts. Consider trying them for increased financial flexibility.")
386
+
387
+ # Recommendations based on age
388
+ if age < 30:
389
+ recommendations.append("Starting early is key to long-term financial success! Focus on building a savings habit and avoid unnecessary debt.")
390
+ elif 30 <= age <= 50:
391
+ recommendations.append("This is the perfect time to focus on increasing savings and planning for long-term goals like buying a home or preparing for children's education.")
392
+ elif age > 50:
393
+ recommendations.append("As you near retirement, focus on low-risk investments and savings. Consider discussing retirement plans with a financial advisor.")
394
+
395
+ # Recommendations based on income quintile
396
+ if income_quintile < 3:
397
+ recommendations.append("You might be eligible for government support programs or financial assistance tailored to lower-income groups. Explore these options to improve your financial stability.")
398
+ else:
399
+ recommendations.append("With a higher income level, consider diversifying your investments, including retirement savings and possibly high-return investments like stocks or real estate.")
400
+
401
+ # Financial goals recommendations
402
+ if financial_goal == "Retirement":
403
+ recommendations.append("It's important to have a solid retirement plan. Focus on long-term, stable investments like pension funds or bonds.")
404
+ elif financial_goal == "Home Ownership":
405
+ recommendations.append("Buying a home is a big goal. Consider saving aggressively or exploring mortgage options to make your goal achievable.")
406
+ elif financial_goal == "Education":
407
+ recommendations.append("Education savings can benefit from high-interest savings accounts or tax-advantaged education accounts.")
408
+
409
+ # Savings habit recommendations
410
+ if savings_habit == "No":
411
+ recommendations.append("Starting a savings habit, even if it's a small amount each month, can build your financial security over time.")
412
+ else:
413
+ recommendations.append("Good job on saving! Consider increasing the amount or exploring higher-yield savings accounts or investments.")
414
+
415
+ # Investment interest recommendations
416
+ if investment_interest == "Yes":
417
+ recommendations.append("Since you're interested in investing, explore stocks, mutual funds, or real estate. A financial advisor can help you find the right options.")
418
+ else:
419
+ recommendations.append("If you're unsure about investing, start small with safer options like government bonds or index funds.")
420
+
421
+ return recommendations
422
+
423
+ # Collect user inputs
424
+ age = st.slider("Your Age", 18, 70, 30)
425
+ income_quintile = st.slider("Income Quintile (1 = Lowest, 5 = Highest)", 1, 5, 3)
426
+ has_debit_card = st.radio("Do you have a debit card?", ("Yes", "No"))
427
+ uses_mobile_money = st.radio("Have you used mobile money?", ("Yes", "No"))
428
+ financial_goal = st.radio("What is your main financial goal?", ("Retirement", "Home Ownership", "Education"))
429
+ savings_habit = st.radio("Do you have a savings habit?", ("Yes", "No"))
430
+ investment_interest = st.radio("Are you interested in investing?", ("Yes", "No"))
431
+
432
+ # Initialize recommendations as an empty list
433
+ recommendations = []
434
+
435
+ if st.button("Get Recommendations"):
436
+ # Call the enhanced_recommender function and generate recommendations
437
+ recommendations = enhanced_recommender(age, income_quintile, has_debit_card, uses_mobile_money, financial_goal, savings_habit, investment_interest)
438
+
439
+ # Check if the recommendations were generated successfully and display them
440
+ if recommendations:
441
+ st.markdown("### Recommendations:")
442
+ for rec in recommendations:
443
+ st.write(f"- {rec}")
444
+ else:
445
+ st.write("Click the 'Get Recommendations' button to receive personalized financial recommendations.")
446
+
447
+ elif option == "SML Classification":
448
+ st.title("SML Classification - Financial Prediction")
449
+
450
+ # Collect user inputs for prediction
451
+ st.markdown("### Provide the details to predict the financial product:")
452
+ age = st.slider("Your Age", 18, 70, 30)
453
+ income_bracket = st.selectbox("Income Bracket (1 = Lowest, 5 = Highest)", [1, 2, 3, 4, 5])
454
+ has_internet_access = st.radio("Do you have Internet Access?", ["Yes", "No"])
455
+ employed = st.radio("Are you employed?", ["Yes", "No"])
456
+ high_income_region = st.radio("Are you in a High Income Region?", ["Yes", "No"])
457
+
458
+ # Convert user inputs to a DataFrame
459
+ input_data = pd.DataFrame({
460
+ 'Age': [age],
461
+ 'Income Bracket': [income_bracket],
462
+ 'Has Internet Access': [1 if has_internet_access == "Yes" else 0],
463
+ 'Employed': [1 if employed == "Yes" else 0],
464
+ 'High Income Region': [1 if high_income_region == "Yes" else 0]
465
+ })
466
+
467
+ # One-hot encode the categorical features
468
+ input_data_encoded = ohe.transform(input_data)
469
+
470
+ # Make prediction
471
+ if st.button("Predict"):
472
+ prediction = xgb_model.predict(input_data_encoded)
473
+ st.write(f"Prediction: {prediction[0]}")
data_cleaned_new.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:553e345f4038c4979fa3ac06dcc446d3a78743ac9254d466f97e1966d75e95c1
3
+ size 45798088
ohe.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edd8512fb4b7ad72f6e71e55cbb5c7055fb1a582c4f453de0502458115c7c31f
3
+ size 1783
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ numpy
4
+ matplotlib
5
+ seaborn
6
+ altair
7
+ plotly
8
+ scikit-learn
9
+ joblib
10
+ shap
11
+ xgboost
12
+ streamlit-shap
xgb_model.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a0fb46e809b70cd4ca9eab23274d50f6f36a9a83d67014be19826c37b9b241d
3
+ size 498749