import gradio as gr import pandas as pd import numpy as np import joblib from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier from sklearn.impute import SimpleImputer from sklearn.preprocessing import OneHotEncoder from sklearn.compose import ColumnTransformer from sklearn.pipeline import Pipeline from sklearn.base import BaseEstimator, TransformerMixin # ---------------------------- # Custom Numeric Imputer # ---------------------------- class CustomImputer(TransformerMixin): def fit(self, X, y=None): # Precompute group means for imputations self.group_means = { 'rate_of_interest': X.groupby(['loan_type', 'term'])['rate_of_interest'].mean(), 'Interest_rate_spread': X.groupby(['loan_type', 'term'])['Interest_rate_spread'].mean(), 'Upfront_charges': X.groupby(['loan_type', 'term'])['Upfront_charges'].mean(), 'rate_of_interest_loan': X.groupby(['loan_type'])['rate_of_interest'].mean(), 'Interest_rate_spread_loan': X.groupby(['loan_type'])['Interest_rate_spread'].mean(), 'Upfront_charges_loan': X.groupby(['loan_type'])['Upfront_charges'].mean(), 'income_by_age': X.groupby(['age'])['income'].mean(), 'property_value_mean': X['property_value'].mean(), 'dtir1_mean': X['dtir1'].mean(), 'income_mean': X['income'].mean(), } return self def transform(self, X): X = X.copy() # Impute numerical features using group-based means for col in ['rate_of_interest', 'Interest_rate_spread', 'Upfront_charges']: X[col] = X.groupby(['loan_type', 'term'])[col].transform(lambda x: x.fillna(x.mean())).round(3 if col == 'rate_of_interest' else 4) for col in ['rate_of_interest', 'Interest_rate_spread', 'Upfront_charges']: loan_mean = self.group_means[col + '_loan'] X[col] = X.apply( lambda row: row[col] if pd.notnull(row[col]) else loan_mean.get(row['loan_type'], np.nan), axis=1 ) X[col] = X[col].round(3 if col == 'rate_of_interest' else 4) # Impute property_value and dtir1 X['property_value'] = X['property_value'].fillna(self.group_means['property_value_mean']) X['property_value'] = np.round(X['property_value'], -3) X['dtir1'] = X['dtir1'].fillna(self.group_means['dtir1_mean']).round(0) # Income X['income'] = X.groupby(['age'])['income'].transform(lambda x: x.fillna(x.mean())) X['income'] = X['income'].fillna(self.group_means['income_mean']) X['income'] = np.round(X['income'], -2) # LTV X['LTV'] = X['LTV'].fillna(X['loan_amount'] / X['property_value'] * 100).round(8) return X # ---------------------------- # Custom Categorical Cleaner # ---------------------------- class CustomCleaner(BaseEstimator, TransformerMixin): def __init__(self, drop_cols=None, missing_placeholders=None, cat_cols=None): self.drop_cols = drop_cols self.missing_placeholders = missing_placeholders if missing_placeholders is not None else ['', 'NA', 'nan', 'NaN'] self.cat_cols = cat_cols def fit(self, X, y=None): return self def transform(self, X): X = X.copy() if self.drop_cols: X = X.drop(self.drop_cols, axis=1) if 'Security_Type' in X.columns: X['Security_Type'] = X['Security_Type'].replace({'Indriect': 'Indirect'}) if self.cat_cols: for col in self.cat_cols: if col in X.columns: X[col] = X[col].replace(self.missing_placeholders, np.nan) return X # ---------------------------- # Load models and preprocessing pipelines # ---------------------------- gb_loaded = joblib.load('gradient_boosting_model.pkl') rf_loaded = joblib.load("random_forest_model.pkl") num_pipeline = joblib.load('num_pipeline.pkl') # numeric imputer pipeline custom_cleaner = joblib.load('custom_cleaner.pkl') # custom cleaning transformer cat_preprocessing = joblib.load('cat_preprocessing.pkl') # categorical preprocessing # ---------------------------- # Predefined CSV file options # ---------------------------- csv_files = { "Default 1": "Default_1.csv", "Default 2": "Default_2.csv", "Non Default": "Non_default.csv" } # ---------------------------- # Prediction function # ---------------------------- def predict_csv_from_dropdown(file_choice, model_choice): # Read CSV based on dropdown choice file_path = csv_files[file_choice] df = pd.read_csv(file_path) # Filter rows with 'term' not null df_cleaned = df[df['term'].notnull()].copy() # Drop target if exists if 'target' in df_cleaned.columns: df_cleaned = df_cleaned.drop(columns=['target']) # Numeric preprocessing X_num = num_pipeline.transform(df_cleaned) # Custom cleaning X_cleaned = custom_cleaner.transform(X_num) # Categorical preprocessing X_processed = cat_preprocessing.transform(X_cleaned) # Select model model = rf_loaded if model_choice == "Random Forest" else gb_loaded # Predict preds = model.predict(X_processed) probs = model.predict_proba(X_processed).max(axis=1) # Convert to readable labels labels = ['Non-default' if c == 0 else 'Default' for c in preds] results = pd.DataFrame({ 'Prediction': labels, 'Confidence': probs }) return results # ---------------------------- # Gradio Interface # ---------------------------- iface = gr.Interface( fn=predict_csv_from_dropdown, inputs=[ gr.Dropdown(choices=list(csv_files.keys()), label="Select CSV File"), gr.Dropdown(choices=["Random Forest", "Gradient Boosting"], label="Select Model") ], outputs=gr.Dataframe(headers=["Prediction", "Confidence"]), title="Loan Default Prediction", description="Select a CSV file and model to predict whether the applicant will Default (1) or Non-default (0) the loan." ) if __name__ == "__main__": iface.launch()