Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -7,7 +7,7 @@ import altair as alt
|
|
| 7 |
import plotly.express as px
|
| 8 |
from sklearn.model_selection import train_test_split
|
| 9 |
import joblib
|
| 10 |
-
import shap
|
| 11 |
from xgboost import XGBClassifier
|
| 12 |
from sklearn.preprocessing import OneHotEncoder
|
| 13 |
from streamlit_shap import st_shap
|
|
@@ -26,6 +26,8 @@ def load_model_and_encoder():
|
|
| 26 |
|
| 27 |
xgb_model, ohe = load_model_and_encoder()
|
| 28 |
|
|
|
|
|
|
|
| 29 |
|
| 30 |
# Load the data
|
| 31 |
data = load_data()
|
|
@@ -445,29 +447,58 @@ elif option == "Financial Recommender Engine":
|
|
| 445 |
st.write("Click the 'Get Recommendations' button to receive personalized financial recommendations.")
|
| 446 |
|
| 447 |
elif option == "SML Classification":
|
| 448 |
-
st.title("SML Classification - Financial Prediction")
|
| 449 |
|
| 450 |
-
#
|
| 451 |
-
st.markdown("### Provide the details to predict the financial product:")
|
| 452 |
age = st.slider("Your Age", 18, 70, 30)
|
| 453 |
income_bracket = st.selectbox("Income Bracket (1 = Lowest, 5 = Highest)", [1, 2, 3, 4, 5])
|
| 454 |
has_internet_access = st.radio("Do you have Internet Access?", ["Yes", "No"])
|
| 455 |
employed = st.radio("Are you employed?", ["Yes", "No"])
|
| 456 |
high_income_region = st.radio("Are you in a High Income Region?", ["Yes", "No"])
|
| 457 |
-
|
| 458 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 459 |
input_data = pd.DataFrame({
|
| 460 |
-
'Age': [age],
|
| 461 |
'Income Bracket': [income_bracket],
|
| 462 |
'Has Internet Access': [1 if has_internet_access == "Yes" else 0],
|
| 463 |
'Employed': [1 if employed == "Yes" else 0],
|
| 464 |
-
'High Income Region': [1 if high_income_region == "Yes" else 0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 465 |
})
|
| 466 |
|
| 467 |
# One-hot encode the categorical features
|
| 468 |
-
input_data_encoded = ohe.transform(input_data)
|
| 469 |
|
| 470 |
-
#
|
| 471 |
if st.button("Predict"):
|
| 472 |
-
prediction = xgb_model.predict(input_data_encoded)
|
| 473 |
-
st.write(f"Prediction: {prediction
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
import plotly.express as px
|
| 8 |
from sklearn.model_selection import train_test_split
|
| 9 |
import joblib
|
| 10 |
+
import shap
|
| 11 |
from xgboost import XGBClassifier
|
| 12 |
from sklearn.preprocessing import OneHotEncoder
|
| 13 |
from streamlit_shap import st_shap
|
|
|
|
| 26 |
|
| 27 |
xgb_model, ohe = load_model_and_encoder()
|
| 28 |
|
| 29 |
+
# Create SHAP explainer
|
| 30 |
+
explainer = shap.TreeExplainer(xgb_model)
|
| 31 |
|
| 32 |
# Load the data
|
| 33 |
data = load_data()
|
|
|
|
| 447 |
st.write("Click the 'Get Recommendations' button to receive personalized financial recommendations.")
|
| 448 |
|
| 449 |
elif option == "SML Classification":
|
| 450 |
+
st.title("SML Classification - Financial Product Prediction")
|
| 451 |
|
| 452 |
+
# User inputs
|
|
|
|
| 453 |
age = st.slider("Your Age", 18, 70, 30)
|
| 454 |
income_bracket = st.selectbox("Income Bracket (1 = Lowest, 5 = Highest)", [1, 2, 3, 4, 5])
|
| 455 |
has_internet_access = st.radio("Do you have Internet Access?", ["Yes", "No"])
|
| 456 |
employed = st.radio("Are you employed?", ["Yes", "No"])
|
| 457 |
high_income_region = st.radio("Are you in a High Income Region?", ["Yes", "No"])
|
| 458 |
+
is_mobileowner = st.radio("Are you a Mobileowner?", ["Yes", "No"])
|
| 459 |
+
place_of_living = st.selectbox("Place of Living", ["Urban Area", "Rural Area", "Unknown"])
|
| 460 |
+
education_level = st.selectbox("Education Level", ["Primary", "Secondary", "Tertiary"])
|
| 461 |
+
age_group = st.selectbox("Age Group", ["Teen", "Young Adult", "Adult", "Middle Age", "Older Adult", "Elder", "Senior"])
|
| 462 |
+
|
| 463 |
+
# Prepare categorical features
|
| 464 |
input_data = pd.DataFrame({
|
|
|
|
| 465 |
'Income Bracket': [income_bracket],
|
| 466 |
'Has Internet Access': [1 if has_internet_access == "Yes" else 0],
|
| 467 |
'Employed': [1 if employed == "Yes" else 0],
|
| 468 |
+
'High Income Region': [1 if high_income_region == "Yes" else 0],
|
| 469 |
+
'Is Mobileowner': [1 if is_mobileowner == "Yes" else 0],
|
| 470 |
+
'Place of living_Unknown': [1 if place_of_living == "Unknown" else 0],
|
| 471 |
+
'Place of living_Urban Area': [1 if place_of_living == "Urban Area" else 0],
|
| 472 |
+
'Place of living_Rural Area': [1 if place_of_living == "Rural Area" else 0],
|
| 473 |
+
'Education Level_Primary': [1 if education_level == "Primary" else 0],
|
| 474 |
+
'Education Level_Secondary': [1 if education_level == "Secondary" else 0],
|
| 475 |
+
'Education Level_Tertiary': [1 if education_level == "Tertiary" else 0],
|
| 476 |
+
'Age Group_Teen': [1 if age_group == "Teen" else 0],
|
| 477 |
+
'Age Group_Young Adult': [1 if age_group == "Young Adult" else 0],
|
| 478 |
+
'Age Group_Adult': [1 if age_group == "Adult" else 0],
|
| 479 |
+
'Age Group_Middle Age': [1 if age_group == "Middle Age" else 0],
|
| 480 |
+
'Age Group_Older Adult': [1 if age_group == "Older Adult" else 0],
|
| 481 |
+
'Age Group_Elder': [1 if age_group == "Elder" else 0],
|
| 482 |
+
'Age Group_Senior': [1 if age_group == "Senior" else 0]
|
| 483 |
})
|
| 484 |
|
| 485 |
# One-hot encode the categorical features
|
| 486 |
+
input_data_encoded = pd.DataFrame(ohe.transform(input_data).todense(), columns=ohe.get_feature_names_out())
|
| 487 |
|
| 488 |
+
# Prediction
|
| 489 |
if st.button("Predict"):
|
| 490 |
+
prediction = xgb_model.predict(input_data_encoded)[0]
|
| 491 |
+
st.write(f"Prediction: {prediction}")
|
| 492 |
+
|
| 493 |
+
# SHAP explanation
|
| 494 |
+
st.subheader('Factors Influencing Prediction 🤖')
|
| 495 |
+
shap_values = explainer.shap_values(input_data_encoded)
|
| 496 |
+
st_shap(shap.force_plot(explainer.expected_value, shap_values, input_data_encoded), height=400, width=600)
|
| 497 |
+
|
| 498 |
+
st.markdown("""
|
| 499 |
+
This plot shows how each feature contributes to the prediction:
|
| 500 |
+
- Blue bars push the prediction lower
|
| 501 |
+
- Red bars push the prediction higher
|
| 502 |
+
""")
|
| 503 |
+
|
| 504 |
+
# Add other elif blocks for 'Description of Variables', 'Regional-Based Analysis', 'Income-Based Analysis', and 'Gender-Based Analysis'
|