File size: 6,158 Bytes
b00d985
 
 
 
73ec2e0
 
 
 
 
 
50b44af
73ec2e0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76afcf0
73ec2e0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0adbd62
80f2d4a
 
 
0adbd62
76afcf0
73ec2e0
 
 
0adbd62
 
 
 
b00d985
 
73ec2e0
b00d985
73ec2e0
b00d985
 
 
73ec2e0
b00d985
 
 
 
 
 
 
835afa7
73ec2e0
0adbd62
b00d985
 
835afa7
0adbd62
b00d985
73ec2e0
b00d985
 
 
 
 
 
 
 
 
73ec2e0
 
 
b00d985
0adbd62
835afa7
0adbd62
835afa7
 
b00d985
835afa7
0adbd62
b00d985
 
 
 
0adbd62
73ec2e0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
import gradio as gr
import pandas as pd
import numpy as np
import joblib
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.base import BaseEstimator, TransformerMixin

# ----------------------------
# Custom Numeric Imputer
# ----------------------------
class CustomImputer(TransformerMixin):
    def fit(self, X, y=None):
        # Precompute group means for imputations
        self.group_means = {
            'rate_of_interest': X.groupby(['loan_type', 'term'])['rate_of_interest'].mean(),
            'Interest_rate_spread': X.groupby(['loan_type', 'term'])['Interest_rate_spread'].mean(),
            'Upfront_charges': X.groupby(['loan_type', 'term'])['Upfront_charges'].mean(),
            'rate_of_interest_loan': X.groupby(['loan_type'])['rate_of_interest'].mean(),
            'Interest_rate_spread_loan': X.groupby(['loan_type'])['Interest_rate_spread'].mean(),
            'Upfront_charges_loan': X.groupby(['loan_type'])['Upfront_charges'].mean(),
            'income_by_age': X.groupby(['age'])['income'].mean(),
            'property_value_mean': X['property_value'].mean(),
            'dtir1_mean': X['dtir1'].mean(),
            'income_mean': X['income'].mean(),
        }
        return self

    def transform(self, X):
        X = X.copy()

        # Impute numerical features using group-based means
        for col in ['rate_of_interest', 'Interest_rate_spread', 'Upfront_charges']:
            X[col] = X.groupby(['loan_type', 'term'])[col].transform(lambda x: x.fillna(x.mean())).round(3 if col == 'rate_of_interest' else 4)

        for col in ['rate_of_interest', 'Interest_rate_spread', 'Upfront_charges']:
            loan_mean = self.group_means[col + '_loan']
            X[col] = X.apply(
                lambda row: row[col] if pd.notnull(row[col]) else loan_mean.get(row['loan_type'], np.nan),
                axis=1
            )
            X[col] = X[col].round(3 if col == 'rate_of_interest' else 4)

        # Impute property_value and dtir1
        X['property_value'] = X['property_value'].fillna(self.group_means['property_value_mean'])
        X['property_value'] = np.round(X['property_value'], -3)

        X['dtir1'] = X['dtir1'].fillna(self.group_means['dtir1_mean']).round(0)

        # Income
        X['income'] = X.groupby(['age'])['income'].transform(lambda x: x.fillna(x.mean()))
        X['income'] = X['income'].fillna(self.group_means['income_mean'])
        X['income'] = np.round(X['income'], -2)

        # LTV
        X['LTV'] = X['LTV'].fillna(X['loan_amount'] / X['property_value'] * 100).round(8)

        return X

# ----------------------------
# Custom Categorical Cleaner
# ----------------------------
class CustomCleaner(BaseEstimator, TransformerMixin):
    def __init__(self, drop_cols=None, missing_placeholders=None, cat_cols=None):
        self.drop_cols = drop_cols
        self.missing_placeholders = missing_placeholders if missing_placeholders is not None else ['', 'NA', 'nan', 'NaN']
        self.cat_cols = cat_cols

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        X = X.copy()

        if self.drop_cols:
            X = X.drop(self.drop_cols, axis=1)

        if 'Security_Type' in X.columns:
            X['Security_Type'] = X['Security_Type'].replace({'Indriect': 'Indirect'})

        if self.cat_cols:
            for col in self.cat_cols:
                if col in X.columns:
                    X[col] = X[col].replace(self.missing_placeholders, np.nan)

        return X

# ----------------------------
# Load models and preprocessing pipelines
# ----------------------------
gb_loaded = joblib.load('gradient_boosting_model.pkl')
rf_loaded = joblib.load("random_forest_model.pkl")
num_pipeline = joblib.load('num_pipeline.pkl')       # numeric imputer pipeline
custom_cleaner = joblib.load('custom_cleaner.pkl')   # custom cleaning transformer
cat_preprocessing = joblib.load('cat_preprocessing.pkl')  # categorical preprocessing

# ----------------------------
# Predefined CSV file options
# ----------------------------
csv_files = {
    "Default 1": "Default_1.csv",
    "Default 2": "Default_2.csv",
    "Non Default": "Non_default.csv"
}

# ----------------------------
# Prediction function
# ----------------------------
def predict_csv_from_dropdown(file_choice, model_choice):
    # Read CSV based on dropdown choice
    file_path = csv_files[file_choice]
    df = pd.read_csv(file_path)
    
    # Filter rows with 'term' not null
    df_cleaned = df[df['term'].notnull()].copy()
    
    # Drop target if exists
    if 'target' in df_cleaned.columns:
        df_cleaned = df_cleaned.drop(columns=['target'])
    
    # Numeric preprocessing
    X_num = num_pipeline.transform(df_cleaned)
    
    # Custom cleaning
    X_cleaned = custom_cleaner.transform(X_num)
    
    # Categorical preprocessing
    X_processed = cat_preprocessing.transform(X_cleaned)

    # Select model
    model = rf_loaded if model_choice == "Random Forest" else gb_loaded
    
    # Predict
    preds = model.predict(X_processed)
    probs = model.predict_proba(X_processed).max(axis=1)
    
    # Convert to readable labels
    labels = ['Non-default' if c == 0 else 'Default' for c in preds]
    
    results = pd.DataFrame({
        'Prediction': labels,
        'Confidence': probs
    })
    
    return results

# ----------------------------
# Gradio Interface
# ----------------------------
iface = gr.Interface(
    fn=predict_csv_from_dropdown,
    inputs=[
        gr.Dropdown(choices=list(csv_files.keys()), label="Select CSV File"),
        gr.Dropdown(choices=["Random Forest", "Gradient Boosting"], label="Select Model")
    ],
    outputs=gr.Dataframe(headers=["Prediction", "Confidence"]),
    title="Loan Default Prediction",
    description="Select a CSV file and model to predict whether the applicant will Default (1) or Non-default (0) the loan."
)

if __name__ == "__main__":
    iface.launch()