Siddhant Maji commited on
Commit
3286f8c
·
1 Parent(s): e95329b
app.py CHANGED
@@ -6,88 +6,276 @@ import numpy as np
6
  import pandas as pd
7
  from tensorflow.keras.models import load_model
8
 
9
- # Load models
10
  log_reg = joblib.load("models/logistic_regression_model.pkl")
11
  xgb = pickle.load(open("models/xgboost_model.pkl", "rb"))
12
  ffnn = load_model("models/ffnn_model.keras")
13
  scaler = joblib.load("models/standard_scaler.pkl")
 
14
 
15
- import json
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
- with open("data/feature_names.json", "r") as f:
18
- feature_names = json.load(f)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
 
 
21
  def predict_default(*inputs):
22
- processed_inputs = []
23
- for name, val in zip(feature_names, inputs):
24
- if name in categorical_mappings:
25
- val = categorical_mappings[name].index(val) # Convert string to int
26
- processed_inputs.append(val)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
- input_df = pd.DataFrame([processed_inputs], columns=feature_names)
 
 
 
 
 
 
 
 
 
 
29
  scaled = scaler.transform(input_df)
30
 
31
- logit = log_reg.predict_proba(scaled)[0][1]
32
- xgb_pred = xgb.predict_proba(input_df.values)[0][1]
33
- ffnn_pred = ffnn.predict(scaled)[0][0]
 
 
 
34
 
35
- return {
36
- "Logistic Regression": float(logit),
37
- "XGBoost": float(xgb_pred),
38
- "FFNN": float(ffnn_pred),
39
  }
40
 
 
 
 
 
41
 
42
- default_values = [
43
- 56.0, # Age
44
- 85994.0, # Income
45
- 50587.0, # LoanAmount
46
- 520.0, # CreditScore
47
- 80.0, # MonthsEmployed
48
- 4.0, # NumCreditLines
49
- 15.23, # InterestRate
50
- 36.0, # LoanTerm
51
- 0.44, # DTIRatio
52
- 0.0, # Education
53
- 0.0, # EmploymentType
54
- 0.0, # MaritalStatus
55
- 1.0, # HasMortgage
56
- 1.0, # HasDependents
57
- 4.0, # LoanPurpose
58
- 1.0, # HasCoSigner
59
- -0.895272, # AffRatio
60
- 0.431883, # TotalInterest
61
- 0.139637, # Debt
62
- -1.28165, # AvgBorrowed
63
- ]
64
 
65
- categorical_mappings = {
66
- "Education": ["Bachelor's", "High School", "Master's", "PhD"],
67
- "EmploymentType": ["Full-time", "Part-time", "Self-employed", "Unemployed"],
68
- "MaritalStatus": ["Divorced", "Married", "Single"],
69
- "HasMortgage": ["No", "Yes"],
70
- "HasDependents": ["No", "Yes"],
71
- "LoanPurpose": ["Auto", "Business", "Education", "Home", "Other"],
72
- "HasCoSigner": ["No", "Yes"],
73
- }
74
 
 
75
 
76
- input_components = []
77
- for name, val in zip(feature_names, default_values):
78
- if name in categorical_mappings:
79
- choices = categorical_mappings[name]
80
- input_components.append(
81
- gr.Dropdown(label=name, choices=choices, value=choices[int(val)])
82
- )
83
- else:
84
- input_components.append(gr.Number(label=name, value=val))
85
- output_components = gr.Label(num_top_classes=3)
86
 
87
  demo = gr.Interface(
88
  fn=predict_default,
89
  inputs=input_components,
90
- outputs=output_components,
91
  title="Loan Default Risk Predictor",
92
  description="Enter borrower info and see the default risk prediction from 3 models.",
93
  flagging_mode="never",
 
6
  import pandas as pd
7
  from tensorflow.keras.models import load_model
8
 
9
+ # Load models and scaler
10
  log_reg = joblib.load("models/logistic_regression_model.pkl")
11
  xgb = pickle.load(open("models/xgboost_model.pkl", "rb"))
12
  ffnn = load_model("models/ffnn_model.keras")
13
  scaler = joblib.load("models/standard_scaler.pkl")
14
+ pt = joblib.load("models/boxcox_transformer.pkl")
15
 
16
+ # Master feature definition: order matters!
17
+ features = {
18
+ "Age": {
19
+ "type": "numeric",
20
+ "default": 56.0,
21
+ "explanation": "The age of the borrower in years.",
22
+ },
23
+ "Income": {
24
+ "type": "numeric",
25
+ "default": 85994.0,
26
+ "explanation": "The annual income of the borrower in USD.",
27
+ },
28
+ "LoanAmount": {
29
+ "type": "numeric",
30
+ "default": 50587.0,
31
+ "explanation": "The amount of money being borrowed in USD.",
32
+ },
33
+ "CreditScore": {
34
+ "type": "numeric",
35
+ "default": 520.0,
36
+ "explanation": "Credit score indicating borrower creditworthiness.",
37
+ },
38
+ "MonthsEmployed": {
39
+ "type": "numeric",
40
+ "default": 80.0,
41
+ "explanation": "Months the borrower has been employed at current job.",
42
+ },
43
+ "NumCreditLines": {
44
+ "type": "numeric",
45
+ "default": 4.0,
46
+ "explanation": "Number of active credit lines the borrower has.",
47
+ },
48
+ "InterestRate": {
49
+ "type": "numeric",
50
+ "default": 15.23,
51
+ "explanation": "Interest rate for the loan as a percentage.",
52
+ },
53
+ "LoanTerm": {
54
+ "type": "numeric",
55
+ "default": 36.0,
56
+ "explanation": "Duration of the loan in months.",
57
+ },
58
+ "DTIRatio": {
59
+ "type": "derived",
60
+ "explanation": "Debt-to-Income ratio (total debt / annual income).",
61
+ },
62
+ "Education": {
63
+ "type": "categorical",
64
+ "default": 0.0,
65
+ "categories": ["Bachelor's", "High School", "Master's", "PhD"],
66
+ "explanation": "Highest education level attained by the borrower.",
67
+ },
68
+ "EmploymentType": {
69
+ "type": "categorical",
70
+ "default": 0.0,
71
+ "categories": ["Full-time", "Part-time", "Self-employed", "Unemployed"],
72
+ "explanation": "Borrower's employment status.",
73
+ },
74
+ "MaritalStatus": {
75
+ "type": "categorical",
76
+ "default": 0.0,
77
+ "categories": ["Divorced", "Married", "Single"],
78
+ "explanation": "Borrower's marital status.",
79
+ },
80
+ "HasMortgage": {
81
+ "type": "categorical",
82
+ "default": 1.0,
83
+ "categories": ["No", "Yes"],
84
+ "explanation": "Whether the borrower currently has a mortgage.",
85
+ },
86
+ "HasDependents": {
87
+ "type": "categorical",
88
+ "default": 1.0,
89
+ "categories": ["No", "Yes"],
90
+ "explanation": "Whether the borrower has dependents.",
91
+ },
92
+ "LoanPurpose": {
93
+ "type": "categorical",
94
+ "default": 4.0,
95
+ "categories": ["Auto", "Business", "Education", "Home", "Other"],
96
+ "explanation": "The reason for taking out the loan.",
97
+ },
98
+ "HasCoSigner": {
99
+ "type": "categorical",
100
+ "default": 1.0,
101
+ "categories": ["No", "Yes"],
102
+ "explanation": "Whether there is a co-signer on the loan.",
103
+ },
104
+ "AffRatio": {
105
+ "type": "derived",
106
+ "explanation": "LoanAmount divided by Income, a financial ratio.",
107
+ },
108
+ "TotalInterest": {
109
+ "type": "derived",
110
+ "explanation": "Total interest paid: InterestRate * LoanTerm.",
111
+ },
112
+ "Debt": {"type": "numeric", "default": 37837.36, "explanation": "Total debt."},
113
+ "AvgBorrowed": {
114
+ "type": "derived",
115
+ "explanation": "Average borrowed amount per credit line.",
116
+ },
117
+ }
118
+
119
+ # Gradio input components (with refs)
120
+ input_components = []
121
+ component_refs = {}
122
+
123
+ for name, meta in features.items():
124
+ if meta["type"] == "categorical":
125
+ dropdown = gr.Dropdown(
126
+ label=name,
127
+ choices=meta["categories"],
128
+ value=meta["categories"][int(meta["default"])],
129
+ info=meta["explanation"],
130
+ )
131
+ input_components.append(dropdown)
132
+ component_refs[name] = dropdown
133
+ elif meta["type"] == "numeric":
134
+ number = gr.Number(label=name, value=meta["default"], info=meta["explanation"])
135
+ input_components.append(number)
136
+ component_refs[name] = number
137
 
138
+ # Derived components
139
+ input_components += [
140
+ gr.Number(
141
+ label="AffRatio",
142
+ interactive=False,
143
+ info=features["AffRatio"]["explanation"],
144
+ value=lambda loan, income: round(loan / income, 5) if income else 0,
145
+ inputs=[component_refs["LoanAmount"], component_refs["Income"]],
146
+ ),
147
+ gr.Number(
148
+ label="TotalInterest",
149
+ interactive=False,
150
+ info=features["TotalInterest"]["explanation"],
151
+ value=lambda rate, term: round(rate * term, 5),
152
+ inputs=[component_refs["InterestRate"], component_refs["LoanTerm"]],
153
+ ),
154
+ gr.Number(
155
+ label="DTIRatio",
156
+ interactive=False,
157
+ info=features["DTIRatio"]["explanation"],
158
+ value=lambda debt, income: round(debt / income, 5) if income else 0,
159
+ inputs=[component_refs["Debt"], component_refs["Income"]],
160
+ ),
161
+ gr.Number(
162
+ label="AvgBorrowed",
163
+ interactive=False,
164
+ info=features["AvgBorrowed"]["explanation"],
165
+ value=lambda loan, lines: round(loan / lines, 5) if lines else 0,
166
+ inputs=[component_refs["LoanAmount"], component_refs["NumCreditLines"]],
167
+ ),
168
+ ]
169
 
170
 
171
+ # Inference logic
172
  def predict_default(*inputs):
173
+ input_map = {}
174
+ input_index = 0
175
+
176
+ for name, meta in features.items():
177
+ if meta["type"] == "derived":
178
+ continue
179
+
180
+ val = inputs[input_index]
181
+ if meta["type"] == "categorical":
182
+ val = meta["categories"].index(val)
183
+ input_map[name] = val
184
+ input_index += 1
185
+
186
+ # Derived features and Box-Cox transform (same as before)
187
+ input_map["AffRatio"] = (
188
+ round(input_map["LoanAmount"] / input_map["Income"], 5)
189
+ if input_map["Income"]
190
+ else 0
191
+ )
192
+ input_map["TotalInterest"] = round(
193
+ input_map["InterestRate"] * input_map["LoanTerm"], 5
194
+ )
195
+ input_map["DTIRatio"] = (
196
+ round(input_map["Debt"] / input_map["Income"], 5) if input_map["Income"] else 0
197
+ )
198
+ input_map["AvgBorrowed"] = (
199
+ round(input_map["LoanAmount"] / input_map["NumCreditLines"], 5)
200
+ if input_map["NumCreditLines"]
201
+ else 0
202
+ )
203
+
204
+ derived_cols = ["AffRatio", "AvgBorrowed", "TotalInterest", "Debt"]
205
+ derived_values_df = pd.DataFrame(
206
+ [
207
+ [
208
+ input_map["AffRatio"],
209
+ input_map["AvgBorrowed"],
210
+ input_map["TotalInterest"],
211
+ input_map["Debt"],
212
+ ]
213
+ ],
214
+ columns=derived_cols,
215
+ )
216
 
217
+ transformed_derived = pt.transform(derived_values_df).flatten()
218
+
219
+ (
220
+ input_map["AffRatio"],
221
+ input_map["AvgBorrowed"],
222
+ input_map["TotalInterest"],
223
+ input_map["Debt"],
224
+ ) = transformed_derived
225
+
226
+ input_row = [input_map[name] for name in features.keys()]
227
+ input_df = pd.DataFrame([input_row], columns=list(features.keys()))
228
  scaled = scaler.transform(input_df)
229
 
230
+ # Get probabilities
231
+ probs = {
232
+ "Logistic Regression": float(log_reg.predict_proba(scaled)[0][1]),
233
+ "XGBoost": float(xgb.predict_proba(input_df.values)[0][1]),
234
+ "FFNN": float(ffnn.predict(scaled, verbose=0)[0][0]),
235
+ }
236
 
237
+ # Binary labels using 0.5 threshold
238
+ labels = {
239
+ model: "Default" if p >= 0.5 else "No Default" for model, p in probs.items()
 
240
  }
241
 
242
+ # Create markdown summary for labels
243
+ label_md = "\n".join(
244
+ [f"## {model}: *{label}*\n" for model, label in labels.items()]
245
+ )
246
 
247
+ # Explanatory text for the user
248
+ explanation_md = (
249
+ "### Prediction Explanation\n"
250
+ "Each model predicts the probability that the borrower will default on their loan.\n"
251
+ "- Probabilities closer to 1 indicate higher risk of default.\n"
252
+ "- A threshold of 0.5 is used to classify 'Default' vs 'No Default'.\n"
253
+ "- Consider the results from all models to get a comprehensive view.\n"
254
+ "\n"
255
+ "Please use this information as guidance and not a final decision."
256
+ )
 
 
 
 
 
 
 
 
 
 
 
 
257
 
258
+ # For bar chart: format data as dict with labels and values
259
+ bar_data = pd.DataFrame(
260
+ {
261
+ "Model": list(probs.keys()),
262
+ "Default Probability": list(probs.values()),
263
+ }
264
+ )
 
 
265
 
266
+ return bar_data, label_md, explanation_md
267
 
268
+
269
+ output_bar = gr.BarPlot(
270
+ x="Model", y="Default Probability", label="Model Default Probabilities", height=250
271
+ )
272
+ output_labels = gr.Markdown()
273
+ output_explanation = gr.Markdown()
 
 
 
 
274
 
275
  demo = gr.Interface(
276
  fn=predict_default,
277
  inputs=input_components,
278
+ outputs=[output_bar, output_labels, output_explanation],
279
  title="Loan Default Risk Predictor",
280
  description="Enter borrower info and see the default risk prediction from 3 models.",
281
  flagging_mode="never",
models/boxcox_transformer.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0af89d68933bda5628d46608cc9db072bcffe83ad33004c335cbedfb70494f17
3
+ size 1245
models/ffnn_model.keras CHANGED
Binary files a/models/ffnn_model.keras and b/models/ffnn_model.keras differ
 
models/xgboost_model.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d546ba56110501c951797c7758c516ab7f4523e2ee43778f1590eb081155de89
3
  size 177107
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9c79b7401355a2c2c05ca0e286975930ee934f5d583583d9ccf494a50c3ac38
3
  size 177107
requirements.txt CHANGED
@@ -1,5 +1,5 @@
1
  gradio==5.41.0
2
  joblib==1.5.1
3
- numpy==2.2.6
4
- pandas==2.3.1
5
- tensorflow==2.20.0rc0
 
1
  gradio==5.41.0
2
  joblib==1.5.1
3
+ numpy==2.0.2
4
+ pandas==2.2.2
5
+ tensorflow==2.19.0