Spaces:

sidmaji
/

loan-default-predictor

Sleeping

Siddhant Maji

updated

3286f8c 7 months ago

9.05 kB

	import pickle

	import gradio as gr
	import joblib
	import numpy as np
	import pandas as pd
	from tensorflow.keras.models import load_model

	# Load models and scaler
	log_reg = joblib.load("models/logistic_regression_model.pkl")
	xgb = pickle.load(open("models/xgboost_model.pkl", "rb"))
	ffnn = load_model("models/ffnn_model.keras")
	scaler = joblib.load("models/standard_scaler.pkl")
	pt = joblib.load("models/boxcox_transformer.pkl")

	# Master feature definition: order matters!
	features = {
	"Age": {
	"type": "numeric",
	"default": 56.0,
	"explanation": "The age of the borrower in years.",
	},
	"Income": {
	"type": "numeric",
	"default": 85994.0,
	"explanation": "The annual income of the borrower in USD.",
	},
	"LoanAmount": {
	"type": "numeric",
	"default": 50587.0,
	"explanation": "The amount of money being borrowed in USD.",
	},
	"CreditScore": {
	"type": "numeric",
	"default": 520.0,
	"explanation": "Credit score indicating borrower creditworthiness.",
	},
	"MonthsEmployed": {
	"type": "numeric",
	"default": 80.0,
	"explanation": "Months the borrower has been employed at current job.",
	},
	"NumCreditLines": {
	"type": "numeric",
	"default": 4.0,
	"explanation": "Number of active credit lines the borrower has.",
	},
	"InterestRate": {
	"type": "numeric",
	"default": 15.23,
	"explanation": "Interest rate for the loan as a percentage.",
	},
	"LoanTerm": {
	"type": "numeric",
	"default": 36.0,
	"explanation": "Duration of the loan in months.",
	},
	"DTIRatio": {
	"type": "derived",
	"explanation": "Debt-to-Income ratio (total debt / annual income).",
	},
	"Education": {
	"type": "categorical",
	"default": 0.0,
	"categories": ["Bachelor's", "High School", "Master's", "PhD"],
	"explanation": "Highest education level attained by the borrower.",
	},
	"EmploymentType": {
	"type": "categorical",
	"default": 0.0,
	"categories": ["Full-time", "Part-time", "Self-employed", "Unemployed"],
	"explanation": "Borrower's employment status.",
	},
	"MaritalStatus": {
	"type": "categorical",
	"default": 0.0,
	"categories": ["Divorced", "Married", "Single"],
	"explanation": "Borrower's marital status.",
	},
	"HasMortgage": {
	"type": "categorical",
	"default": 1.0,
	"categories": ["No", "Yes"],
	"explanation": "Whether the borrower currently has a mortgage.",
	},
	"HasDependents": {
	"type": "categorical",
	"default": 1.0,
	"categories": ["No", "Yes"],
	"explanation": "Whether the borrower has dependents.",
	},
	"LoanPurpose": {
	"type": "categorical",
	"default": 4.0,
	"categories": ["Auto", "Business", "Education", "Home", "Other"],
	"explanation": "The reason for taking out the loan.",
	},
	"HasCoSigner": {
	"type": "categorical",
	"default": 1.0,
	"categories": ["No", "Yes"],
	"explanation": "Whether there is a co-signer on the loan.",
	},
	"AffRatio": {
	"type": "derived",
	"explanation": "LoanAmount divided by Income, a financial ratio.",
	},
	"TotalInterest": {
	"type": "derived",
	"explanation": "Total interest paid: InterestRate * LoanTerm.",
	},
	"Debt": {"type": "numeric", "default": 37837.36, "explanation": "Total debt."},
	"AvgBorrowed": {
	"type": "derived",
	"explanation": "Average borrowed amount per credit line.",
	},
	}

	# Gradio input components (with refs)
	input_components = []
	component_refs = {}

	for name, meta in features.items():
	if meta["type"] == "categorical":
	dropdown = gr.Dropdown(
	label=name,
	choices=meta["categories"],
	value=meta["categories"][int(meta["default"])],
	info=meta["explanation"],
	)
	input_components.append(dropdown)
	component_refs[name] = dropdown
	elif meta["type"] == "numeric":
	number = gr.Number(label=name, value=meta["default"], info=meta["explanation"])
	input_components.append(number)
	component_refs[name] = number

	# Derived components
	input_components += [
	gr.Number(
	label="AffRatio",
	interactive=False,
	info=features["AffRatio"]["explanation"],
	value=lambda loan, income: round(loan / income, 5) if income else 0,
	inputs=[component_refs["LoanAmount"], component_refs["Income"]],
	),
	gr.Number(
	label="TotalInterest",
	interactive=False,
	info=features["TotalInterest"]["explanation"],
	value=lambda rate, term: round(rate * term, 5),
	inputs=[component_refs["InterestRate"], component_refs["LoanTerm"]],
	),
	gr.Number(
	label="DTIRatio",
	interactive=False,
	info=features["DTIRatio"]["explanation"],
	value=lambda debt, income: round(debt / income, 5) if income else 0,
	inputs=[component_refs["Debt"], component_refs["Income"]],
	),
	gr.Number(
	label="AvgBorrowed",
	interactive=False,
	info=features["AvgBorrowed"]["explanation"],
	value=lambda loan, lines: round(loan / lines, 5) if lines else 0,
	inputs=[component_refs["LoanAmount"], component_refs["NumCreditLines"]],
	),
	]


	# Inference logic
	def predict_default(*inputs):
	input_map = {}
	input_index = 0

	for name, meta in features.items():
	if meta["type"] == "derived":
	continue

	val = inputs[input_index]
	if meta["type"] == "categorical":
	val = meta["categories"].index(val)
	input_map[name] = val
	input_index += 1

	# Derived features and Box-Cox transform (same as before)
	input_map["AffRatio"] = (
	round(input_map["LoanAmount"] / input_map["Income"], 5)
	if input_map["Income"]
	else 0
	)
	input_map["TotalInterest"] = round(
	input_map["InterestRate"] * input_map["LoanTerm"], 5
	)
	input_map["DTIRatio"] = (
	round(input_map["Debt"] / input_map["Income"], 5) if input_map["Income"] else 0
	)
	input_map["AvgBorrowed"] = (
	round(input_map["LoanAmount"] / input_map["NumCreditLines"], 5)
	if input_map["NumCreditLines"]
	else 0
	)

	derived_cols = ["AffRatio", "AvgBorrowed", "TotalInterest", "Debt"]
	derived_values_df = pd.DataFrame(
	[
	[
	input_map["AffRatio"],
	input_map["AvgBorrowed"],
	input_map["TotalInterest"],
	input_map["Debt"],
	]
	],
	columns=derived_cols,
	)

	transformed_derived = pt.transform(derived_values_df).flatten()

	(
	input_map["AffRatio"],
	input_map["AvgBorrowed"],
	input_map["TotalInterest"],
	input_map["Debt"],
	) = transformed_derived

	input_row = [input_map[name] for name in features.keys()]
	input_df = pd.DataFrame([input_row], columns=list(features.keys()))
	scaled = scaler.transform(input_df)

	# Get probabilities
	probs = {
	"Logistic Regression": float(log_reg.predict_proba(scaled)[0][1]),
	"XGBoost": float(xgb.predict_proba(input_df.values)[0][1]),
	"FFNN": float(ffnn.predict(scaled, verbose=0)[0][0]),
	}

	# Binary labels using 0.5 threshold
	labels = {
	model: "Default" if p >= 0.5 else "No Default" for model, p in probs.items()
	}

	# Create markdown summary for labels
	label_md = "\n".join(
	[f"## {model}: {label}\n" for model, label in labels.items()]
	)

	# Explanatory text for the user
	explanation_md = (
	"### Prediction Explanation\n"
	"Each model predicts the probability that the borrower will default on their loan.\n"
	"- Probabilities closer to 1 indicate higher risk of default.\n"
	"- A threshold of 0.5 is used to classify 'Default' vs 'No Default'.\n"
	"- Consider the results from all models to get a comprehensive view.\n"
	"\n"
	"Please use this information as guidance and not a final decision."
	)

	# For bar chart: format data as dict with labels and values
	bar_data = pd.DataFrame(
	{
	"Model": list(probs.keys()),
	"Default Probability": list(probs.values()),
	}
	)

	return bar_data, label_md, explanation_md


	output_bar = gr.BarPlot(
	x="Model", y="Default Probability", label="Model Default Probabilities", height=250
	)
	output_labels = gr.Markdown()
	output_explanation = gr.Markdown()

	demo = gr.Interface(
	fn=predict_default,
	inputs=input_components,
	outputs=[output_bar, output_labels, output_explanation],
	title="Loan Default Risk Predictor",
	description="Enter borrower info and see the default risk prediction from 3 models.",
	flagging_mode="never",
	)

	demo.launch()