Spaces:
Sleeping
Sleeping
File size: 6,158 Bytes
b00d985 73ec2e0 50b44af 73ec2e0 76afcf0 73ec2e0 0adbd62 80f2d4a 0adbd62 76afcf0 73ec2e0 0adbd62 b00d985 73ec2e0 b00d985 73ec2e0 b00d985 73ec2e0 b00d985 835afa7 73ec2e0 0adbd62 b00d985 835afa7 0adbd62 b00d985 73ec2e0 b00d985 73ec2e0 b00d985 0adbd62 835afa7 0adbd62 835afa7 b00d985 835afa7 0adbd62 b00d985 0adbd62 73ec2e0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 |
import gradio as gr
import pandas as pd
import numpy as np
import joblib
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.base import BaseEstimator, TransformerMixin
# ----------------------------
# Custom Numeric Imputer
# ----------------------------
class CustomImputer(TransformerMixin):
def fit(self, X, y=None):
# Precompute group means for imputations
self.group_means = {
'rate_of_interest': X.groupby(['loan_type', 'term'])['rate_of_interest'].mean(),
'Interest_rate_spread': X.groupby(['loan_type', 'term'])['Interest_rate_spread'].mean(),
'Upfront_charges': X.groupby(['loan_type', 'term'])['Upfront_charges'].mean(),
'rate_of_interest_loan': X.groupby(['loan_type'])['rate_of_interest'].mean(),
'Interest_rate_spread_loan': X.groupby(['loan_type'])['Interest_rate_spread'].mean(),
'Upfront_charges_loan': X.groupby(['loan_type'])['Upfront_charges'].mean(),
'income_by_age': X.groupby(['age'])['income'].mean(),
'property_value_mean': X['property_value'].mean(),
'dtir1_mean': X['dtir1'].mean(),
'income_mean': X['income'].mean(),
}
return self
def transform(self, X):
X = X.copy()
# Impute numerical features using group-based means
for col in ['rate_of_interest', 'Interest_rate_spread', 'Upfront_charges']:
X[col] = X.groupby(['loan_type', 'term'])[col].transform(lambda x: x.fillna(x.mean())).round(3 if col == 'rate_of_interest' else 4)
for col in ['rate_of_interest', 'Interest_rate_spread', 'Upfront_charges']:
loan_mean = self.group_means[col + '_loan']
X[col] = X.apply(
lambda row: row[col] if pd.notnull(row[col]) else loan_mean.get(row['loan_type'], np.nan),
axis=1
)
X[col] = X[col].round(3 if col == 'rate_of_interest' else 4)
# Impute property_value and dtir1
X['property_value'] = X['property_value'].fillna(self.group_means['property_value_mean'])
X['property_value'] = np.round(X['property_value'], -3)
X['dtir1'] = X['dtir1'].fillna(self.group_means['dtir1_mean']).round(0)
# Income
X['income'] = X.groupby(['age'])['income'].transform(lambda x: x.fillna(x.mean()))
X['income'] = X['income'].fillna(self.group_means['income_mean'])
X['income'] = np.round(X['income'], -2)
# LTV
X['LTV'] = X['LTV'].fillna(X['loan_amount'] / X['property_value'] * 100).round(8)
return X
# ----------------------------
# Custom Categorical Cleaner
# ----------------------------
class CustomCleaner(BaseEstimator, TransformerMixin):
def __init__(self, drop_cols=None, missing_placeholders=None, cat_cols=None):
self.drop_cols = drop_cols
self.missing_placeholders = missing_placeholders if missing_placeholders is not None else ['', 'NA', 'nan', 'NaN']
self.cat_cols = cat_cols
def fit(self, X, y=None):
return self
def transform(self, X):
X = X.copy()
if self.drop_cols:
X = X.drop(self.drop_cols, axis=1)
if 'Security_Type' in X.columns:
X['Security_Type'] = X['Security_Type'].replace({'Indriect': 'Indirect'})
if self.cat_cols:
for col in self.cat_cols:
if col in X.columns:
X[col] = X[col].replace(self.missing_placeholders, np.nan)
return X
# ----------------------------
# Load models and preprocessing pipelines
# ----------------------------
gb_loaded = joblib.load('gradient_boosting_model.pkl')
rf_loaded = joblib.load("random_forest_model.pkl")
num_pipeline = joblib.load('num_pipeline.pkl') # numeric imputer pipeline
custom_cleaner = joblib.load('custom_cleaner.pkl') # custom cleaning transformer
cat_preprocessing = joblib.load('cat_preprocessing.pkl') # categorical preprocessing
# ----------------------------
# Predefined CSV file options
# ----------------------------
csv_files = {
"Default 1": "Default_1.csv",
"Default 2": "Default_2.csv",
"Non Default": "Non_default.csv"
}
# ----------------------------
# Prediction function
# ----------------------------
def predict_csv_from_dropdown(file_choice, model_choice):
# Read CSV based on dropdown choice
file_path = csv_files[file_choice]
df = pd.read_csv(file_path)
# Filter rows with 'term' not null
df_cleaned = df[df['term'].notnull()].copy()
# Drop target if exists
if 'target' in df_cleaned.columns:
df_cleaned = df_cleaned.drop(columns=['target'])
# Numeric preprocessing
X_num = num_pipeline.transform(df_cleaned)
# Custom cleaning
X_cleaned = custom_cleaner.transform(X_num)
# Categorical preprocessing
X_processed = cat_preprocessing.transform(X_cleaned)
# Select model
model = rf_loaded if model_choice == "Random Forest" else gb_loaded
# Predict
preds = model.predict(X_processed)
probs = model.predict_proba(X_processed).max(axis=1)
# Convert to readable labels
labels = ['Non-default' if c == 0 else 'Default' for c in preds]
results = pd.DataFrame({
'Prediction': labels,
'Confidence': probs
})
return results
# ----------------------------
# Gradio Interface
# ----------------------------
iface = gr.Interface(
fn=predict_csv_from_dropdown,
inputs=[
gr.Dropdown(choices=list(csv_files.keys()), label="Select CSV File"),
gr.Dropdown(choices=["Random Forest", "Gradient Boosting"], label="Select Model")
],
outputs=gr.Dataframe(headers=["Prediction", "Confidence"]),
title="Loan Default Prediction",
description="Select a CSV file and model to predict whether the applicant will Default (1) or Non-default (0) the loan."
)
if __name__ == "__main__":
iface.launch()
|