Siddhant Maji
updated
3286f8c
import pickle
import gradio as gr
import joblib
import numpy as np
import pandas as pd
from tensorflow.keras.models import load_model
# Load models and scaler
log_reg = joblib.load("models/logistic_regression_model.pkl")
xgb = pickle.load(open("models/xgboost_model.pkl", "rb"))
ffnn = load_model("models/ffnn_model.keras")
scaler = joblib.load("models/standard_scaler.pkl")
pt = joblib.load("models/boxcox_transformer.pkl")
# Master feature definition: order matters!
features = {
"Age": {
"type": "numeric",
"default": 56.0,
"explanation": "The age of the borrower in years.",
},
"Income": {
"type": "numeric",
"default": 85994.0,
"explanation": "The annual income of the borrower in USD.",
},
"LoanAmount": {
"type": "numeric",
"default": 50587.0,
"explanation": "The amount of money being borrowed in USD.",
},
"CreditScore": {
"type": "numeric",
"default": 520.0,
"explanation": "Credit score indicating borrower creditworthiness.",
},
"MonthsEmployed": {
"type": "numeric",
"default": 80.0,
"explanation": "Months the borrower has been employed at current job.",
},
"NumCreditLines": {
"type": "numeric",
"default": 4.0,
"explanation": "Number of active credit lines the borrower has.",
},
"InterestRate": {
"type": "numeric",
"default": 15.23,
"explanation": "Interest rate for the loan as a percentage.",
},
"LoanTerm": {
"type": "numeric",
"default": 36.0,
"explanation": "Duration of the loan in months.",
},
"DTIRatio": {
"type": "derived",
"explanation": "Debt-to-Income ratio (total debt / annual income).",
},
"Education": {
"type": "categorical",
"default": 0.0,
"categories": ["Bachelor's", "High School", "Master's", "PhD"],
"explanation": "Highest education level attained by the borrower.",
},
"EmploymentType": {
"type": "categorical",
"default": 0.0,
"categories": ["Full-time", "Part-time", "Self-employed", "Unemployed"],
"explanation": "Borrower's employment status.",
},
"MaritalStatus": {
"type": "categorical",
"default": 0.0,
"categories": ["Divorced", "Married", "Single"],
"explanation": "Borrower's marital status.",
},
"HasMortgage": {
"type": "categorical",
"default": 1.0,
"categories": ["No", "Yes"],
"explanation": "Whether the borrower currently has a mortgage.",
},
"HasDependents": {
"type": "categorical",
"default": 1.0,
"categories": ["No", "Yes"],
"explanation": "Whether the borrower has dependents.",
},
"LoanPurpose": {
"type": "categorical",
"default": 4.0,
"categories": ["Auto", "Business", "Education", "Home", "Other"],
"explanation": "The reason for taking out the loan.",
},
"HasCoSigner": {
"type": "categorical",
"default": 1.0,
"categories": ["No", "Yes"],
"explanation": "Whether there is a co-signer on the loan.",
},
"AffRatio": {
"type": "derived",
"explanation": "LoanAmount divided by Income, a financial ratio.",
},
"TotalInterest": {
"type": "derived",
"explanation": "Total interest paid: InterestRate * LoanTerm.",
},
"Debt": {"type": "numeric", "default": 37837.36, "explanation": "Total debt."},
"AvgBorrowed": {
"type": "derived",
"explanation": "Average borrowed amount per credit line.",
},
}
# Gradio input components (with refs)
input_components = []
component_refs = {}
for name, meta in features.items():
if meta["type"] == "categorical":
dropdown = gr.Dropdown(
label=name,
choices=meta["categories"],
value=meta["categories"][int(meta["default"])],
info=meta["explanation"],
)
input_components.append(dropdown)
component_refs[name] = dropdown
elif meta["type"] == "numeric":
number = gr.Number(label=name, value=meta["default"], info=meta["explanation"])
input_components.append(number)
component_refs[name] = number
# Derived components
input_components += [
gr.Number(
label="AffRatio",
interactive=False,
info=features["AffRatio"]["explanation"],
value=lambda loan, income: round(loan / income, 5) if income else 0,
inputs=[component_refs["LoanAmount"], component_refs["Income"]],
),
gr.Number(
label="TotalInterest",
interactive=False,
info=features["TotalInterest"]["explanation"],
value=lambda rate, term: round(rate * term, 5),
inputs=[component_refs["InterestRate"], component_refs["LoanTerm"]],
),
gr.Number(
label="DTIRatio",
interactive=False,
info=features["DTIRatio"]["explanation"],
value=lambda debt, income: round(debt / income, 5) if income else 0,
inputs=[component_refs["Debt"], component_refs["Income"]],
),
gr.Number(
label="AvgBorrowed",
interactive=False,
info=features["AvgBorrowed"]["explanation"],
value=lambda loan, lines: round(loan / lines, 5) if lines else 0,
inputs=[component_refs["LoanAmount"], component_refs["NumCreditLines"]],
),
]
# Inference logic
def predict_default(*inputs):
input_map = {}
input_index = 0
for name, meta in features.items():
if meta["type"] == "derived":
continue
val = inputs[input_index]
if meta["type"] == "categorical":
val = meta["categories"].index(val)
input_map[name] = val
input_index += 1
# Derived features and Box-Cox transform (same as before)
input_map["AffRatio"] = (
round(input_map["LoanAmount"] / input_map["Income"], 5)
if input_map["Income"]
else 0
)
input_map["TotalInterest"] = round(
input_map["InterestRate"] * input_map["LoanTerm"], 5
)
input_map["DTIRatio"] = (
round(input_map["Debt"] / input_map["Income"], 5) if input_map["Income"] else 0
)
input_map["AvgBorrowed"] = (
round(input_map["LoanAmount"] / input_map["NumCreditLines"], 5)
if input_map["NumCreditLines"]
else 0
)
derived_cols = ["AffRatio", "AvgBorrowed", "TotalInterest", "Debt"]
derived_values_df = pd.DataFrame(
[
[
input_map["AffRatio"],
input_map["AvgBorrowed"],
input_map["TotalInterest"],
input_map["Debt"],
]
],
columns=derived_cols,
)
transformed_derived = pt.transform(derived_values_df).flatten()
(
input_map["AffRatio"],
input_map["AvgBorrowed"],
input_map["TotalInterest"],
input_map["Debt"],
) = transformed_derived
input_row = [input_map[name] for name in features.keys()]
input_df = pd.DataFrame([input_row], columns=list(features.keys()))
scaled = scaler.transform(input_df)
# Get probabilities
probs = {
"Logistic Regression": float(log_reg.predict_proba(scaled)[0][1]),
"XGBoost": float(xgb.predict_proba(input_df.values)[0][1]),
"FFNN": float(ffnn.predict(scaled, verbose=0)[0][0]),
}
# Binary labels using 0.5 threshold
labels = {
model: "Default" if p >= 0.5 else "No Default" for model, p in probs.items()
}
# Create markdown summary for labels
label_md = "\n".join(
[f"## {model}: *{label}*\n" for model, label in labels.items()]
)
# Explanatory text for the user
explanation_md = (
"### Prediction Explanation\n"
"Each model predicts the probability that the borrower will default on their loan.\n"
"- Probabilities closer to 1 indicate higher risk of default.\n"
"- A threshold of 0.5 is used to classify 'Default' vs 'No Default'.\n"
"- Consider the results from all models to get a comprehensive view.\n"
"\n"
"Please use this information as guidance and not a final decision."
)
# For bar chart: format data as dict with labels and values
bar_data = pd.DataFrame(
{
"Model": list(probs.keys()),
"Default Probability": list(probs.values()),
}
)
return bar_data, label_md, explanation_md
output_bar = gr.BarPlot(
x="Model", y="Default Probability", label="Model Default Probabilities", height=250
)
output_labels = gr.Markdown()
output_explanation = gr.Markdown()
demo = gr.Interface(
fn=predict_default,
inputs=input_components,
outputs=[output_bar, output_labels, output_explanation],
title="Loan Default Risk Predictor",
description="Enter borrower info and see the default risk prediction from 3 models.",
flagging_mode="never",
)
demo.launch()