farahabdou commited on
Commit
313fedb
Β·
verified Β·
1 Parent(s): 553284a

Upload 5 files

Browse files
Files changed (5) hide show
  1. app.py +181 -0
  2. scaler.joblib +3 -0
  3. selected_features.json +30 -0
  4. shap_background.joblib +3 -0
  5. xgb_model.joblib +3 -0
app.py ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Loan Default Prediction β€” Streamlit App
3
+ Deployed on Hugging Face Spaces
4
+ """
5
+
6
+ import streamlit as st
7
+ import pandas as pd
8
+ import numpy as np
9
+ import joblib
10
+ import json
11
+ import shap
12
+ import matplotlib.pyplot as plt
13
+
14
+ # ─── Page Config ───
15
+ st.set_page_config(
16
+ page_title="Loan Default Prediction",
17
+ page_icon="🏦",
18
+ layout="wide"
19
+ )
20
+
21
+ # ─── Load Artifacts ───
22
+ @st.cache_resource
23
+ def load_artifacts():
24
+ model = joblib.load("xgb_model.joblib")
25
+ scaler = joblib.load("scaler.joblib")
26
+ with open("selected_features.json") as f:
27
+ features = json.load(f)
28
+ background = joblib.load("shap_background.joblib")
29
+ explainer = shap.TreeExplainer(model)
30
+ return model, scaler, features, background, explainer
31
+
32
+ model, scaler, feature_names, background, explainer = load_artifacts()
33
+
34
+ # ─── Title ───
35
+ st.title("🏦 Loan Default Prediction")
36
+ st.markdown("""
37
+ This app predicts whether a loan applicant is likely to **default** or be **approved**,
38
+ using an XGBoost model trained on 45,000 loan records. It also provides a SHAP-based
39
+ explanation of each prediction.
40
+ """)
41
+
42
+ st.divider()
43
+
44
+ # ─── Sidebar Inputs ───
45
+ st.sidebar.header("πŸ“ Applicant Information")
46
+
47
+ person_age = st.sidebar.slider("Age", 18, 80, 30)
48
+ person_income = st.sidebar.number_input("Annual Income ($)", 8000, 500000, 50000, step=1000)
49
+ person_emp_exp = st.sidebar.slider("Employment Experience (years)", 0, 60, 5)
50
+ loan_amnt = st.sidebar.number_input("Loan Amount ($)", 500, 100000, 10000, step=500)
51
+ loan_int_rate = st.sidebar.slider("Loan Interest Rate (%)", 2.0, 25.0, 10.0, step=0.1)
52
+ loan_percent_income = st.sidebar.slider("Loan as % of Income", 0.0, 1.0, 0.2, step=0.01)
53
+ cb_person_cred_hist_length = st.sidebar.slider("Credit History Length (years)", 1.0, 30.0, 5.0, step=0.5)
54
+ credit_score = st.sidebar.slider("Credit Score", 300, 850, 650)
55
+
56
+ previous_loan_defaults = st.sidebar.selectbox("Previous Loan Defaults?", ["No", "Yes"])
57
+ person_education = st.sidebar.selectbox("Education Level", ["High School", "Associate", "Bachelor", "Master", "Doctorate"])
58
+ person_home_ownership = st.sidebar.selectbox("Home Ownership", ["RENT", "OWN", "MORTGAGE", "OTHER"])
59
+ loan_intent = st.sidebar.selectbox("Loan Intent", ["PERSONAL", "EDUCATION", "MEDICAL", "VENTURE", "HOMEIMPROVEMENT", "DEBTCONSOLIDATION"])
60
+
61
+ # ─── Feature Engineering ───
62
+ debt_to_income_ratio = loan_amnt / person_income if person_income > 0 else 0
63
+
64
+ # Age group
65
+ if person_age <= 25:
66
+ age_group = "Young"
67
+ elif person_age <= 35:
68
+ age_group = "Adult"
69
+ elif person_age <= 50:
70
+ age_group = "Middle_Age"
71
+ else:
72
+ age_group = "Senior"
73
+
74
+ # Income category
75
+ if person_income <= 30000:
76
+ income_cat = "Low"
77
+ elif person_income <= 60000:
78
+ income_cat = "Medium"
79
+ elif person_income <= 100000:
80
+ income_cat = "High"
81
+ else:
82
+ income_cat = "Very_High"
83
+
84
+ # ─── Build Feature Vector ───
85
+ # Must match exact feature order from training
86
+ input_dict = {
87
+ 'person_age': person_age,
88
+ 'person_income': person_income,
89
+ 'person_emp_exp': person_emp_exp,
90
+ 'loan_amnt': loan_amnt,
91
+ 'loan_int_rate': loan_int_rate,
92
+ 'loan_percent_income': loan_percent_income,
93
+ 'cb_person_cred_hist_length': cb_person_cred_hist_length,
94
+ 'credit_score': credit_score,
95
+ 'previous_loan_defaults_on_file': 1 if previous_loan_defaults == "Yes" else 0,
96
+ 'debt_to_income_ratio': debt_to_income_ratio,
97
+ # One-hot: person_education (drop_first = Associate)
98
+ 'person_education_Bachelor': 1 if person_education == "Bachelor" else 0,
99
+ 'person_education_Doctorate': 1 if person_education == "Doctorate" else 0,
100
+ 'person_education_High School': 1 if person_education == "High School" else 0,
101
+ 'person_education_Master': 1 if person_education == "Master" else 0,
102
+ # One-hot: home_ownership (drop_first = MORTGAGE)
103
+ 'person_home_ownership_OTHER': 1 if person_home_ownership == "OTHER" else 0,
104
+ 'person_home_ownership_OWN': 1 if person_home_ownership == "OWN" else 0,
105
+ 'person_home_ownership_RENT': 1 if person_home_ownership == "RENT" else 0,
106
+ # One-hot: loan_intent (drop_first = DEBTCONSOLIDATION)
107
+ 'loan_intent_EDUCATION': 1 if loan_intent == "EDUCATION" else 0,
108
+ 'loan_intent_HOMEIMPROVEMENT': 1 if loan_intent == "HOMEIMPROVEMENT" else 0,
109
+ 'loan_intent_MEDICAL': 1 if loan_intent == "MEDICAL" else 0,
110
+ 'loan_intent_PERSONAL': 1 if loan_intent == "PERSONAL" else 0,
111
+ 'loan_intent_VENTURE': 1 if loan_intent == "VENTURE" else 0,
112
+ # One-hot: age_group (drop_first = Young)
113
+ 'age_group_Adult': 1 if age_group == "Adult" else 0,
114
+ 'age_group_Middle_Age': 1 if age_group == "Middle_Age" else 0,
115
+ 'age_group_Senior': 1 if age_group == "Senior" else 0,
116
+ # One-hot: income_category (drop_first = Low)
117
+ 'income_category_Medium': 1 if income_cat == "Medium" else 0,
118
+ 'income_category_High': 1 if income_cat == "High" else 0,
119
+ 'income_category_Very_High': 1 if income_cat == "Very_High" else 0,
120
+ }
121
+
122
+ input_df = pd.DataFrame([input_dict])[feature_names]
123
+
124
+ # Scale
125
+ input_scaled = pd.DataFrame(scaler.transform(input_df), columns=feature_names)
126
+
127
+ # ─── Predict ───
128
+ if st.sidebar.button("πŸ” Predict", type="primary", use_container_width=True):
129
+ prediction = model.predict(input_scaled)[0]
130
+ probability = model.predict_proba(input_scaled)[0]
131
+
132
+ col1, col2 = st.columns(2)
133
+
134
+ with col1:
135
+ st.subheader("Prediction Result")
136
+ if prediction == 1:
137
+ st.error(f"⚠️ **LOAN DEFAULT** β€” Probability: {probability[1]*100:.1f}%")
138
+ else:
139
+ st.success(f"βœ… **LOAN APPROVED** β€” Probability: {probability[0]*100:.1f}%")
140
+
141
+ st.metric("Default Probability", f"{probability[1]*100:.1f}%")
142
+ st.metric("Approval Probability", f"{probability[0]*100:.1f}%")
143
+
144
+ with col2:
145
+ st.subheader("SHAP Explanation")
146
+ shap_values = explainer.shap_values(input_scaled)
147
+
148
+ shap_explanation = shap.Explanation(
149
+ values=shap_values[0],
150
+ base_values=explainer.expected_value,
151
+ data=input_scaled.iloc[0].values,
152
+ feature_names=feature_names
153
+ )
154
+
155
+ fig, ax = plt.subplots(figsize=(8, 6))
156
+ shap.plots.waterfall(shap_explanation, show=False)
157
+ st.pyplot(fig)
158
+ plt.close()
159
+
160
+ # Feature contributions table
161
+ st.subheader("Feature Contributions")
162
+ contrib_df = pd.DataFrame({
163
+ 'Feature': feature_names,
164
+ 'Input Value': input_df.iloc[0].values,
165
+ 'SHAP Value': shap_values[0]
166
+ }).sort_values('SHAP Value', key=abs, ascending=False)
167
+ contrib_df['Direction'] = contrib_df['SHAP Value'].apply(
168
+ lambda x: '↑ Increases Default Risk' if x > 0 else '↓ Decreases Default Risk'
169
+ )
170
+ st.dataframe(contrib_df, use_container_width=True, hide_index=True)
171
+
172
+ else:
173
+ st.info("πŸ‘ˆ Fill in the applicant details in the sidebar and click **Predict**.")
174
+
175
+ # ─── Footer ───
176
+ st.divider()
177
+ st.markdown("""
178
+ **Model:** XGBoost (200 estimators, max_depth=6) | **Accuracy:** 92.78% | **ROC-AUC:** 0.9757
179
+
180
+ **Explainability:** SHAP (TreeExplainer) for post-hoc explanations of the black-box model.
181
+ """)
scaler.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f58c0d8c36dbcab46a42b4b9ea358d4af3a16672e18e37fb9bc2ac6b85c370ee
3
+ size 2199
selected_features.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ "person_age",
3
+ "person_income",
4
+ "person_emp_exp",
5
+ "loan_amnt",
6
+ "loan_int_rate",
7
+ "loan_percent_income",
8
+ "cb_person_cred_hist_length",
9
+ "credit_score",
10
+ "previous_loan_defaults_on_file",
11
+ "debt_to_income_ratio",
12
+ "person_education_Bachelor",
13
+ "person_education_Doctorate",
14
+ "person_education_High School",
15
+ "person_education_Master",
16
+ "person_home_ownership_OTHER",
17
+ "person_home_ownership_OWN",
18
+ "person_home_ownership_RENT",
19
+ "loan_intent_EDUCATION",
20
+ "loan_intent_HOMEIMPROVEMENT",
21
+ "loan_intent_MEDICAL",
22
+ "loan_intent_PERSONAL",
23
+ "loan_intent_VENTURE",
24
+ "age_group_Adult",
25
+ "age_group_Middle_Age",
26
+ "age_group_Senior",
27
+ "income_category_Medium",
28
+ "income_category_High",
29
+ "income_category_Very_High"
30
+ ]
shap_background.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5394e9082e06f2e94d26041a1399aed5ad9fe12b88a5abdd11f7350911d077e
3
+ size 26923
xgb_model.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ed6d104b4f07870431e60012354a7218b6e694d2522dc475c71c0fe1fbe7c37
3
+ size 597784