Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -445,63 +445,76 @@ elif option == "Financial Recommender Engine":
|
|
| 445 |
st.write(f"- {rec}")
|
| 446 |
else:
|
| 447 |
st.write("Click the 'Get Recommendations' button to receive personalized financial recommendations.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 448 |
|
| 449 |
-
|
| 450 |
-
|
| 451 |
-
|
| 452 |
-
|
| 453 |
-
|
| 454 |
-
|
| 455 |
-
# User inputs
|
| 456 |
-
income_bracket = st.selectbox("Income Bracket (1 = Lowest, 5 = Highest)", [1, 2, 3, 4, 5])
|
| 457 |
-
has_internet_access = st.radio("Do you have Internet Access?", ["Yes", "No"])
|
| 458 |
-
employed = st.radio("Are you employed?", ["Yes", "No"])
|
| 459 |
-
high_income_region = st.radio("Are you in a High Income Region?", ["Yes", "No"])
|
| 460 |
-
is_mobileowner = st.radio("Are you a Mobileowner?", ["Yes", "No"])
|
| 461 |
-
place_of_living = st.selectbox("Place of Living", ["Urban Area", "Rural Area", "Unknown"])
|
| 462 |
-
education_level = st.selectbox("Education Level", ["Primary", "Secondary", "Tertiary"])
|
| 463 |
-
age_group = st.selectbox("Age Group", ["Teen", "Young Adult", "Adult", "Middle Age", "Older Adult", "Elder", "Senior"])
|
| 464 |
-
|
| 465 |
-
# Prepare input data to match the model's expected features
|
| 466 |
-
input_data = pd.DataFrame({
|
| 467 |
-
'Income Bracket': [income_bracket],
|
| 468 |
-
'Has Internet Access': [1 if has_internet_access == "Yes" else 0],
|
| 469 |
-
'Employed': [1 if employed == "Yes" else 0],
|
| 470 |
-
'High Income Region': [1 if high_income_region == "Yes" else 0],
|
| 471 |
-
'Is Mobileowner': [1 if is_mobileowner == "Yes" else 0],
|
| 472 |
-
'Place of living_Unknown': [1 if place_of_living == "Unknown" else 0],
|
| 473 |
-
'Place of living_Urban Area': [1 if place_of_living == "Urban Area" else 0],
|
| 474 |
-
'Place of living_Rural Area': [1 if place_of_living == "Rural Area" else 0],
|
| 475 |
-
'Education Level_Primary': [1 if education_level == "Primary" else 0],
|
| 476 |
-
'Education Level_Secondary': [1 if education_level == "Secondary" else 0],
|
| 477 |
-
'Education Level_Tertiary': [1 if education_level == "Tertiary" else 0],
|
| 478 |
-
'Age Group_Teen': [1 if age_group == "Teen" else 0],
|
| 479 |
-
'Age Group_Young Adult': [1 if age_group == "Young Adult" else 0],
|
| 480 |
-
'Age Group_Adult': [1 if age_group == "Adult" else 0],
|
| 481 |
-
'Age Group_Middle Age': [1 if age_group == "Middle Age" else 0],
|
| 482 |
-
'Age Group_Older Adult': [1 if age_group == "Older Adult" else 0],
|
| 483 |
-
'Age Group_Elder': [1 if age_group == "Elder" else 0],
|
| 484 |
-
'Age Group_Senior': [1 if age_group == "Senior" else 0]
|
| 485 |
-
})
|
| 486 |
-
|
| 487 |
-
# One-hot encode the input data
|
| 488 |
-
input_data_encoded = pd.DataFrame(ohe.transform(input_data).todense(), columns=ohe.get_feature_names_out())
|
| 489 |
-
|
| 490 |
-
# Prediction
|
| 491 |
-
if st.button("Predict"):
|
| 492 |
-
# Predict using the loaded model
|
| 493 |
-
prediction = xgb_model.predict(input_data_encoded)[0]
|
| 494 |
-
st.write(f"Prediction: {prediction}")
|
| 495 |
|
| 496 |
-
#
|
| 497 |
-
|
| 498 |
-
|
| 499 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 500 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 501 |
st.markdown("""
|
| 502 |
-
This plot shows how each feature contributes to the
|
| 503 |
-
- Blue bars push the
|
| 504 |
-
- Red bars push the
|
| 505 |
""")
|
| 506 |
|
| 507 |
-
#
|
|
|
|
|
|
| 445 |
st.write(f"- {rec}")
|
| 446 |
else:
|
| 447 |
st.write("Click the 'Get Recommendations' button to receive personalized financial recommendations.")
|
| 448 |
+
|
| 449 |
+
elif option == "Income Bracket & SHAP Analysis":
|
| 450 |
+
st.markdown("<h2 style='text-align: center;'>Income Bracket & SHAP Analysis</h2>", unsafe_allow_html=True)
|
| 451 |
+
|
| 452 |
+
st.subheader('Enter your information')
|
| 453 |
+
|
| 454 |
+
# Categorical Inputs
|
| 455 |
+
place_of_living = st.selectbox('Place of Living', ['Urban Area', 'Rural Area', 'Unknown'])
|
| 456 |
+
education_level = st.selectbox('Education Level', ['Primary', 'Secondary', 'Tertiary'])
|
| 457 |
+
age_group = st.selectbox('Age Group', ['Adult', 'Middle Age', 'Older Adult', 'Senior', 'Teen', 'Young Adult'])
|
| 458 |
+
income_bracket = st.selectbox('Income Bracket', [1, 2, 3, 4, 5])
|
| 459 |
+
|
| 460 |
+
# Binary Inputs
|
| 461 |
+
female = st.radio('Gender', ['Female', 'Male'])
|
| 462 |
+
is_mobileowner = st.checkbox('Is Mobile Owner?')
|
| 463 |
+
has_internet_access = st.checkbox('Has Internet Access?')
|
| 464 |
+
employed = st.checkbox('Employed?')
|
| 465 |
+
high_income_region = st.checkbox('High Income Region?')
|
| 466 |
+
|
| 467 |
+
# Prediction button
|
| 468 |
+
if st.button('Predict Saving Behavior'):
|
| 469 |
+
# Prepare categorical features
|
| 470 |
+
cat_features = pd.DataFrame({
|
| 471 |
+
'Place of living': [place_of_living],
|
| 472 |
+
'Education Level': [education_level],
|
| 473 |
+
'Age Group': [age_group],
|
| 474 |
+
'Income Bracket': [income_bracket] # No longer using income_value
|
| 475 |
+
})
|
| 476 |
+
|
| 477 |
+
# Transform categorical features using the loaded OneHotEncoder
|
| 478 |
+
cat_encoded = pd.DataFrame(ohe.transform(cat_features).todense(),
|
| 479 |
+
columns=ohe.get_feature_names_out(['Place of living', 'Education Level', 'Age Group', 'Income Bracket']))
|
| 480 |
|
| 481 |
+
# Ensure all expected columns are present
|
| 482 |
+
expected_columns = ohe.get_feature_names_out(['Place of living', 'Education Level', 'Age Group', 'Income Bracket'])
|
| 483 |
+
for col in expected_columns:
|
| 484 |
+
if col not in cat_encoded.columns:
|
| 485 |
+
cat_encoded[col] = 0 # Add missing columns with a default value of 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 486 |
|
| 487 |
+
# Reorder columns to match the expected order
|
| 488 |
+
cat_encoded = cat_encoded[expected_columns]
|
| 489 |
+
|
| 490 |
+
# Prepare binary and numerical features
|
| 491 |
+
num_features = pd.DataFrame({
|
| 492 |
+
'Female': [1 if female == 'Female' else 0],
|
| 493 |
+
'Is Mobileowner': [1 if is_mobileowner else 0],
|
| 494 |
+
'Has Internet Access': [1 if has_internet_access else 0],
|
| 495 |
+
'Employed': [1 if employed else 0],
|
| 496 |
+
'High Income Region': [1 if high_income_region else 0]
|
| 497 |
+
})
|
| 498 |
+
|
| 499 |
+
# Combine categorical and numerical features
|
| 500 |
+
features = pd.concat([num_features, cat_encoded], axis=1)
|
| 501 |
|
| 502 |
+
# Make the prediction
|
| 503 |
+
predicted_saved = xgb_model.predict(features)[0]
|
| 504 |
+
|
| 505 |
+
# Display prediction result
|
| 506 |
+
st.metric(label="Predicted Probability of Saving", value=f'{round(predicted_saved * 100, 2)}%')
|
| 507 |
+
|
| 508 |
+
# SHAP explanation
|
| 509 |
+
st.subheader('Feature Contributions 🤖')
|
| 510 |
+
shap_values = explainer.shap_values(features)
|
| 511 |
+
st_shap(shap.force_plot(explainer.expected_value, shap_values, features), height=400, width=600)
|
| 512 |
+
|
| 513 |
st.markdown("""
|
| 514 |
+
This plot shows how each feature contributes to the predicted likelihood of saving:
|
| 515 |
+
- Blue bars push the probability lower
|
| 516 |
+
- Red bars push the probability higher
|
| 517 |
""")
|
| 518 |
|
| 519 |
+
# Continue with other sections such as "Regional-Based Analysis", "Income-Based Analysis", etc.
|
| 520 |
+
# ...
|