Spaces:

sidmaji
/

loan-default-predictor

Sleeping

App Files Files Community

Siddhant Maji commited on Aug 6, 2025

Commit

3286f8c

1 Parent(s): e95329b

updated

Browse files

Files changed (5) hide show

app.py +247 -59
models/boxcox_transformer.pkl +3 -0
models/ffnn_model.keras +0 -0
models/xgboost_model.pkl +1 -1
requirements.txt +3 -3

app.py CHANGED Viewed

@@ -6,88 +6,276 @@ import numpy as np
 import pandas as pd
 from tensorflow.keras.models import load_model
-# Load models
 log_reg = joblib.load("models/logistic_regression_model.pkl")
 xgb = pickle.load(open("models/xgboost_model.pkl", "rb"))
 ffnn = load_model("models/ffnn_model.keras")
 scaler = joblib.load("models/standard_scaler.pkl")
-import json
-with open("data/feature_names.json", "r") as f:
-    feature_names = json.load(f)
 def predict_default(*inputs):
-    processed_inputs = []
-    for name, val in zip(feature_names, inputs):
-        if name in categorical_mappings:
-            val = categorical_mappings[name].index(val)  # Convert string to int
-        processed_inputs.append(val)
-    input_df = pd.DataFrame([processed_inputs], columns=feature_names)
     scaled = scaler.transform(input_df)
-    logit = log_reg.predict_proba(scaled)[0][1]
-    xgb_pred = xgb.predict_proba(input_df.values)[0][1]
-    ffnn_pred = ffnn.predict(scaled)[0][0]
-    return {
-        "Logistic Regression": float(logit),
-        "XGBoost": float(xgb_pred),
-        "FFNN": float(ffnn_pred),
     }
-default_values = [
-    56.0,  # Age
-    85994.0,  # Income
-    50587.0,  # LoanAmount
-    520.0,  # CreditScore
-    80.0,  # MonthsEmployed
-    4.0,  # NumCreditLines
-    15.23,  # InterestRate
-    36.0,  # LoanTerm
-    0.44,  # DTIRatio
-    0.0,  # Education
-    0.0,  # EmploymentType
-    0.0,  # MaritalStatus
-    1.0,  # HasMortgage
-    1.0,  # HasDependents
-    4.0,  # LoanPurpose
-    1.0,  # HasCoSigner
-    -0.895272,  # AffRatio
-    0.431883,  # TotalInterest
-    0.139637,  # Debt
-    -1.28165,  # AvgBorrowed
-]
-categorical_mappings = {
-    "Education": ["Bachelor's", "High School", "Master's", "PhD"],
-    "EmploymentType": ["Full-time", "Part-time", "Self-employed", "Unemployed"],
-    "MaritalStatus": ["Divorced", "Married", "Single"],
-    "HasMortgage": ["No", "Yes"],
-    "HasDependents": ["No", "Yes"],
-    "LoanPurpose": ["Auto", "Business", "Education", "Home", "Other"],
-    "HasCoSigner": ["No", "Yes"],
-}
-input_components = []
-for name, val in zip(feature_names, default_values):
-    if name in categorical_mappings:
-        choices = categorical_mappings[name]
-        input_components.append(
-            gr.Dropdown(label=name, choices=choices, value=choices[int(val)])
-        )
-    else:
-        input_components.append(gr.Number(label=name, value=val))
-output_components = gr.Label(num_top_classes=3)
 demo = gr.Interface(
     fn=predict_default,
     inputs=input_components,
-    outputs=output_components,
     title="Loan Default Risk Predictor",
     description="Enter borrower info and see the default risk prediction from 3 models.",
     flagging_mode="never",

 import pandas as pd
 from tensorflow.keras.models import load_model
+# Load models and scaler
 log_reg = joblib.load("models/logistic_regression_model.pkl")
 xgb = pickle.load(open("models/xgboost_model.pkl", "rb"))
 ffnn = load_model("models/ffnn_model.keras")
 scaler = joblib.load("models/standard_scaler.pkl")
+pt = joblib.load("models/boxcox_transformer.pkl")
+# Master feature definition: order matters!
+features = {
+    "Age": {
+        "type": "numeric",
+        "default": 56.0,
+        "explanation": "The age of the borrower in years.",
+    },
+    "Income": {
+        "type": "numeric",
+        "default": 85994.0,
+        "explanation": "The annual income of the borrower in USD.",
+    },
+    "LoanAmount": {
+        "type": "numeric",
+        "default": 50587.0,
+        "explanation": "The amount of money being borrowed in USD.",
+    },
+    "CreditScore": {
+        "type": "numeric",
+        "default": 520.0,
+        "explanation": "Credit score indicating borrower creditworthiness.",
+    },
+    "MonthsEmployed": {
+        "type": "numeric",
+        "default": 80.0,
+        "explanation": "Months the borrower has been employed at current job.",
+    },
+    "NumCreditLines": {
+        "type": "numeric",
+        "default": 4.0,
+        "explanation": "Number of active credit lines the borrower has.",
+    },
+    "InterestRate": {
+        "type": "numeric",
+        "default": 15.23,
+        "explanation": "Interest rate for the loan as a percentage.",
+    },
+    "LoanTerm": {
+        "type": "numeric",
+        "default": 36.0,
+        "explanation": "Duration of the loan in months.",
+    },
+    "DTIRatio": {
+        "type": "derived",
+        "explanation": "Debt-to-Income ratio (total debt / annual income).",
+    },
+    "Education": {
+        "type": "categorical",
+        "default": 0.0,
+        "categories": ["Bachelor's", "High School", "Master's", "PhD"],
+        "explanation": "Highest education level attained by the borrower.",
+    },
+    "EmploymentType": {
+        "type": "categorical",
+        "default": 0.0,
+        "categories": ["Full-time", "Part-time", "Self-employed", "Unemployed"],
+        "explanation": "Borrower's employment status.",
+    },
+    "MaritalStatus": {
+        "type": "categorical",
+        "default": 0.0,
+        "categories": ["Divorced", "Married", "Single"],
+        "explanation": "Borrower's marital status.",
+    },
+    "HasMortgage": {
+        "type": "categorical",
+        "default": 1.0,
+        "categories": ["No", "Yes"],
+        "explanation": "Whether the borrower currently has a mortgage.",
+    },
+    "HasDependents": {
+        "type": "categorical",
+        "default": 1.0,
+        "categories": ["No", "Yes"],
+        "explanation": "Whether the borrower has dependents.",
+    },
+    "LoanPurpose": {
+        "type": "categorical",
+        "default": 4.0,
+        "categories": ["Auto", "Business", "Education", "Home", "Other"],
+        "explanation": "The reason for taking out the loan.",
+    },
+    "HasCoSigner": {
+        "type": "categorical",
+        "default": 1.0,
+        "categories": ["No", "Yes"],
+        "explanation": "Whether there is a co-signer on the loan.",
+    },
+    "AffRatio": {
+        "type": "derived",
+        "explanation": "LoanAmount divided by Income, a financial ratio.",
+    },
+    "TotalInterest": {
+        "type": "derived",
+        "explanation": "Total interest paid: InterestRate * LoanTerm.",
+    },
+    "Debt": {"type": "numeric", "default": 37837.36, "explanation": "Total debt."},
+    "AvgBorrowed": {
+        "type": "derived",
+        "explanation": "Average borrowed amount per credit line.",
+    },
+}
+# Gradio input components (with refs)
+input_components = []
+component_refs = {}
+for name, meta in features.items():
+    if meta["type"] == "categorical":
+        dropdown = gr.Dropdown(
+            label=name,
+            choices=meta["categories"],
+            value=meta["categories"][int(meta["default"])],
+            info=meta["explanation"],
+        )
+        input_components.append(dropdown)
+        component_refs[name] = dropdown
+    elif meta["type"] == "numeric":
+        number = gr.Number(label=name, value=meta["default"], info=meta["explanation"])
+        input_components.append(number)
+        component_refs[name] = number
+# Derived components
+input_components += [
+    gr.Number(
+        label="AffRatio",
+        interactive=False,
+        info=features["AffRatio"]["explanation"],
+        value=lambda loan, income: round(loan / income, 5) if income else 0,
+        inputs=[component_refs["LoanAmount"], component_refs["Income"]],
+    ),
+    gr.Number(
+        label="TotalInterest",
+        interactive=False,
+        info=features["TotalInterest"]["explanation"],
+        value=lambda rate, term: round(rate * term, 5),
+        inputs=[component_refs["InterestRate"], component_refs["LoanTerm"]],
+    ),
+    gr.Number(
+        label="DTIRatio",
+        interactive=False,
+        info=features["DTIRatio"]["explanation"],
+        value=lambda debt, income: round(debt / income, 5) if income else 0,
+        inputs=[component_refs["Debt"], component_refs["Income"]],
+    ),
+    gr.Number(
+        label="AvgBorrowed",
+        interactive=False,
+        info=features["AvgBorrowed"]["explanation"],
+        value=lambda loan, lines: round(loan / lines, 5) if lines else 0,
+        inputs=[component_refs["LoanAmount"], component_refs["NumCreditLines"]],
+    ),
+]
+# Inference logic
 def predict_default(*inputs):
+    input_map = {}
+    input_index = 0
+    for name, meta in features.items():
+        if meta["type"] == "derived":
+            continue
+        val = inputs[input_index]
+        if meta["type"] == "categorical":
+            val = meta["categories"].index(val)
+        input_map[name] = val
+        input_index += 1
+    # Derived features and Box-Cox transform (same as before)
+    input_map["AffRatio"] = (
+        round(input_map["LoanAmount"] / input_map["Income"], 5)
+        if input_map["Income"]
+        else 0
+    )
+    input_map["TotalInterest"] = round(
+        input_map["InterestRate"] * input_map["LoanTerm"], 5
+    )
+    input_map["DTIRatio"] = (
+        round(input_map["Debt"] / input_map["Income"], 5) if input_map["Income"] else 0
+    )
+    input_map["AvgBorrowed"] = (
+        round(input_map["LoanAmount"] / input_map["NumCreditLines"], 5)
+        if input_map["NumCreditLines"]
+        else 0
+    )
+    derived_cols = ["AffRatio", "AvgBorrowed", "TotalInterest", "Debt"]
+    derived_values_df = pd.DataFrame(
+        [
+            [
+                input_map["AffRatio"],
+                input_map["AvgBorrowed"],
+                input_map["TotalInterest"],
+                input_map["Debt"],
+            ]
+        ],
+        columns=derived_cols,
+    )
+    transformed_derived = pt.transform(derived_values_df).flatten()
+    (
+        input_map["AffRatio"],
+        input_map["AvgBorrowed"],
+        input_map["TotalInterest"],
+        input_map["Debt"],
+    ) = transformed_derived
+    input_row = [input_map[name] for name in features.keys()]
+    input_df = pd.DataFrame([input_row], columns=list(features.keys()))
     scaled = scaler.transform(input_df)
+    # Get probabilities
+    probs = {
+        "Logistic Regression": float(log_reg.predict_proba(scaled)[0][1]),
+        "XGBoost": float(xgb.predict_proba(input_df.values)[0][1]),
+        "FFNN": float(ffnn.predict(scaled, verbose=0)[0][0]),
+    }
+    # Binary labels using 0.5 threshold
+    labels = {
+        model: "Default" if p >= 0.5 else "No Default" for model, p in probs.items()
     }
+    # Create markdown summary for labels
+    label_md = "\n".join(
+        [f"## {model}: *{label}*\n" for model, label in labels.items()]
+    )
+    # Explanatory text for the user
+    explanation_md = (
+        "### Prediction Explanation\n"
+        "Each model predicts the probability that the borrower will default on their loan.\n"
+        "- Probabilities closer to 1 indicate higher risk of default.\n"
+        "- A threshold of 0.5 is used to classify 'Default' vs 'No Default'.\n"
+        "- Consider the results from all models to get a comprehensive view.\n"
+        "\n"
+        "Please use this information as guidance and not a final decision."
+    )
+    # For bar chart: format data as dict with labels and values
+    bar_data = pd.DataFrame(
+        {
+            "Model": list(probs.keys()),
+            "Default Probability": list(probs.values()),
+        }
+    )
+    return bar_data, label_md, explanation_md
+output_bar = gr.BarPlot(
+    x="Model", y="Default Probability", label="Model Default Probabilities", height=250
+)
+output_labels = gr.Markdown()
+output_explanation = gr.Markdown()
 demo = gr.Interface(
     fn=predict_default,
     inputs=input_components,
+    outputs=[output_bar, output_labels, output_explanation],
     title="Loan Default Risk Predictor",
     description="Enter borrower info and see the default risk prediction from 3 models.",
     flagging_mode="never",

models/boxcox_transformer.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0af89d68933bda5628d46608cc9db072bcffe83ad33004c335cbedfb70494f17
+size 1245

models/ffnn_model.keras CHANGED Viewed

Binary files a/models/ffnn_model.keras and b/models/ffnn_model.keras differ

models/xgboost_model.pkl CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d546ba56110501c951797c7758c516ab7f4523e2ee43778f1590eb081155de89
 size 177107

 version https://git-lfs.github.com/spec/v1
+oid sha256:b9c79b7401355a2c2c05ca0e286975930ee934f5d583583d9ccf494a50c3ac38
 size 177107

requirements.txt CHANGED Viewed

@@ -1,5 +1,5 @@
 gradio==5.41.0
 joblib==1.5.1
-numpy==2.2.6
-pandas==2.3.1
-tensorflow==2.20.0rc0

 gradio==5.41.0
 joblib==1.5.1
+numpy==2.0.2
+pandas==2.2.2
+tensorflow==2.19.0