Bjerring98 commited on
Commit
a1324a4
·
verified ·
1 Parent(s): 6aa72d4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -54
app.py CHANGED
@@ -445,63 +445,76 @@ elif option == "Financial Recommender Engine":
445
  st.write(f"- {rec}")
446
  else:
447
  st.write("Click the 'Get Recommendations' button to receive personalized financial recommendations.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
448
 
449
- elif option == "SML Classification":
450
- st.title("SML Classification - Financial Product Prediction")
451
-
452
- # Collect user inputs for prediction
453
- st.markdown("### Provide the details to predict the financial product:")
454
-
455
- # User inputs
456
- income_bracket = st.selectbox("Income Bracket (1 = Lowest, 5 = Highest)", [1, 2, 3, 4, 5])
457
- has_internet_access = st.radio("Do you have Internet Access?", ["Yes", "No"])
458
- employed = st.radio("Are you employed?", ["Yes", "No"])
459
- high_income_region = st.radio("Are you in a High Income Region?", ["Yes", "No"])
460
- is_mobileowner = st.radio("Are you a Mobileowner?", ["Yes", "No"])
461
- place_of_living = st.selectbox("Place of Living", ["Urban Area", "Rural Area", "Unknown"])
462
- education_level = st.selectbox("Education Level", ["Primary", "Secondary", "Tertiary"])
463
- age_group = st.selectbox("Age Group", ["Teen", "Young Adult", "Adult", "Middle Age", "Older Adult", "Elder", "Senior"])
464
-
465
- # Prepare input data to match the model's expected features
466
- input_data = pd.DataFrame({
467
- 'Income Bracket': [income_bracket],
468
- 'Has Internet Access': [1 if has_internet_access == "Yes" else 0],
469
- 'Employed': [1 if employed == "Yes" else 0],
470
- 'High Income Region': [1 if high_income_region == "Yes" else 0],
471
- 'Is Mobileowner': [1 if is_mobileowner == "Yes" else 0],
472
- 'Place of living_Unknown': [1 if place_of_living == "Unknown" else 0],
473
- 'Place of living_Urban Area': [1 if place_of_living == "Urban Area" else 0],
474
- 'Place of living_Rural Area': [1 if place_of_living == "Rural Area" else 0],
475
- 'Education Level_Primary': [1 if education_level == "Primary" else 0],
476
- 'Education Level_Secondary': [1 if education_level == "Secondary" else 0],
477
- 'Education Level_Tertiary': [1 if education_level == "Tertiary" else 0],
478
- 'Age Group_Teen': [1 if age_group == "Teen" else 0],
479
- 'Age Group_Young Adult': [1 if age_group == "Young Adult" else 0],
480
- 'Age Group_Adult': [1 if age_group == "Adult" else 0],
481
- 'Age Group_Middle Age': [1 if age_group == "Middle Age" else 0],
482
- 'Age Group_Older Adult': [1 if age_group == "Older Adult" else 0],
483
- 'Age Group_Elder': [1 if age_group == "Elder" else 0],
484
- 'Age Group_Senior': [1 if age_group == "Senior" else 0]
485
- })
486
-
487
- # One-hot encode the input data
488
- input_data_encoded = pd.DataFrame(ohe.transform(input_data).todense(), columns=ohe.get_feature_names_out())
489
-
490
- # Prediction
491
- if st.button("Predict"):
492
- # Predict using the loaded model
493
- prediction = xgb_model.predict(input_data_encoded)[0]
494
- st.write(f"Prediction: {prediction}")
495
 
496
- # SHAP explanation
497
- st.subheader('Factors Influencing Prediction 🤖')
498
- shap_values = explainer.shap_values(input_data_encoded)
499
- st_shap(shap.force_plot(explainer.expected_value, shap_values, input_data_encoded), height=400, width=600)
 
 
 
 
 
 
 
 
 
 
500
 
 
 
 
 
 
 
 
 
 
 
 
501
  st.markdown("""
502
- This plot shows how each feature contributes to the prediction:
503
- - Blue bars push the prediction lower
504
- - Red bars push the prediction higher
505
  """)
506
 
507
- # Add other elif blocks for 'Description of Variables', 'Regional-Based Analysis', 'Income-Based Analysis', and 'Gender-Based Analysis'
 
 
445
  st.write(f"- {rec}")
446
  else:
447
  st.write("Click the 'Get Recommendations' button to receive personalized financial recommendations.")
448
+
449
+ elif option == "Income Bracket & SHAP Analysis":
450
+ st.markdown("<h2 style='text-align: center;'>Income Bracket & SHAP Analysis</h2>", unsafe_allow_html=True)
451
+
452
+ st.subheader('Enter your information')
453
+
454
+ # Categorical Inputs
455
+ place_of_living = st.selectbox('Place of Living', ['Urban Area', 'Rural Area', 'Unknown'])
456
+ education_level = st.selectbox('Education Level', ['Primary', 'Secondary', 'Tertiary'])
457
+ age_group = st.selectbox('Age Group', ['Adult', 'Middle Age', 'Older Adult', 'Senior', 'Teen', 'Young Adult'])
458
+ income_bracket = st.selectbox('Income Bracket', [1, 2, 3, 4, 5])
459
+
460
+ # Binary Inputs
461
+ female = st.radio('Gender', ['Female', 'Male'])
462
+ is_mobileowner = st.checkbox('Is Mobile Owner?')
463
+ has_internet_access = st.checkbox('Has Internet Access?')
464
+ employed = st.checkbox('Employed?')
465
+ high_income_region = st.checkbox('High Income Region?')
466
+
467
+ # Prediction button
468
+ if st.button('Predict Saving Behavior'):
469
+ # Prepare categorical features
470
+ cat_features = pd.DataFrame({
471
+ 'Place of living': [place_of_living],
472
+ 'Education Level': [education_level],
473
+ 'Age Group': [age_group],
474
+ 'Income Bracket': [income_bracket] # No longer using income_value
475
+ })
476
+
477
+ # Transform categorical features using the loaded OneHotEncoder
478
+ cat_encoded = pd.DataFrame(ohe.transform(cat_features).todense(),
479
+ columns=ohe.get_feature_names_out(['Place of living', 'Education Level', 'Age Group', 'Income Bracket']))
480
 
481
+ # Ensure all expected columns are present
482
+ expected_columns = ohe.get_feature_names_out(['Place of living', 'Education Level', 'Age Group', 'Income Bracket'])
483
+ for col in expected_columns:
484
+ if col not in cat_encoded.columns:
485
+ cat_encoded[col] = 0 # Add missing columns with a default value of 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
486
 
487
+ # Reorder columns to match the expected order
488
+ cat_encoded = cat_encoded[expected_columns]
489
+
490
+ # Prepare binary and numerical features
491
+ num_features = pd.DataFrame({
492
+ 'Female': [1 if female == 'Female' else 0],
493
+ 'Is Mobileowner': [1 if is_mobileowner else 0],
494
+ 'Has Internet Access': [1 if has_internet_access else 0],
495
+ 'Employed': [1 if employed else 0],
496
+ 'High Income Region': [1 if high_income_region else 0]
497
+ })
498
+
499
+ # Combine categorical and numerical features
500
+ features = pd.concat([num_features, cat_encoded], axis=1)
501
 
502
+ # Make the prediction
503
+ predicted_saved = xgb_model.predict(features)[0]
504
+
505
+ # Display prediction result
506
+ st.metric(label="Predicted Probability of Saving", value=f'{round(predicted_saved * 100, 2)}%')
507
+
508
+ # SHAP explanation
509
+ st.subheader('Feature Contributions 🤖')
510
+ shap_values = explainer.shap_values(features)
511
+ st_shap(shap.force_plot(explainer.expected_value, shap_values, features), height=400, width=600)
512
+
513
  st.markdown("""
514
+ This plot shows how each feature contributes to the predicted likelihood of saving:
515
+ - Blue bars push the probability lower
516
+ - Red bars push the probability higher
517
  """)
518
 
519
+ # Continue with other sections such as "Regional-Based Analysis", "Income-Based Analysis", etc.
520
+ # ...